In [None]:
import time

import mediapipe as mp
import cv2

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, GlobalAveragePooling2D, InputLayer
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import pandas as pd
import numpy as np
import os
import string
import pickle
import matplotlib.pyplot as plt

import random


global base_dir
base_dir=os.getcwd()
base_dir='/Users/matthewkwee/Metis'


In [None]:
#SETUP FOR get_landmarks(). RUN FIRST

def setup_hands(confidence=0.75, nhands=1):
    global mpHands,hands,mpDraw
    mpHands = mp.solutions.hands
    hands = mpHands.Hands(max_num_hands=nhands, min_detection_confidence=confidence)
    mpDraw = mp.solutions.drawing_utils

setup_hands()


def get_landmarks(image, draw=False, err=False):
    framergb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    x , y, c = image.shape

    # Get hand landmark prediction
    result = hands.process(framergb)
    # post process the result
    if result.multi_hand_landmarks:
        landmarks = []           
        for handslms in result.multi_hand_landmarks:
            for lm in handslms.landmark:
                # print(id, lm)
                lmx = int(lm.x * x)
                lmy = int(lm.y * y)
                landmarks.append([lmx, lmy])
            if draw:
                # Drawing landmarks on frames
                mpDraw.draw_landmarks(image, handslms, mpHands.HAND_CONNECTIONS)
        return(landmarks)
    else:
        if err:
            print('Error: Hand not detected')
        return('Error: Hand not detected')

In [None]:
def get_landmarks_3d(image, draw=False, err=False):
    framergb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    x , y, c = image.shape

    # Get hand landmark prediction
    result = hands.process(framergb)
    # post process the result
    if result.multi_hand_landmarks:
        landmarks = []           
        for handslms in result.multi_hand_landmarks:
            for lm in handslms.landmark:
                # print(id, lm)
                lmx = int(lm.x * x)
                lmy = int(lm.y * y)
                landmarks.append([lmx, lmy,lm.z])
            if draw:
                # Drawing landmarks on frames
                mpDraw.draw_landmarks(image, handslms, mpHands.HAND_CONNECTIONS)
        return(landmarks)
    else:
        if err:
            print('Error: Hand not detected')
        return('Error: Hand not detected')

In [None]:
get_landmarks_3d(cv2.imread('WD_I.png'))

In [None]:
# Used to scale all landmarks to to palm of the hand


def distance2d(a,b):
    xd=a[0]-b[0]
    yd=a[1]-b[1]
    return((xd**2+yd**2)**0.5)



# Scale all landmarks to the size of the palm of the hand
# Accepts a Pandas row
def reg_lm(landmarks):
    #print(landmarks)
    out_row=landmarks.copy()
    p0,p5,p17=landmarks[0],landmarks[5],landmarks[17],
    p5_p17_mp=[(landmarks[5][0]+landmarks[17][0])/2,(landmarks[5][1]+landmarks[17][1])/2]
    palmh=distance2d(p0,p5_p17_mp)
    palmw=distance2d(p5,p17)
    #print(palmh,palmw)
    try:
        for i in range(21):
            x_out=out_row[i][0]-p0[0]
            y_out=out_row[i][1]-p0[1]
            if float(x_out)!=0.0:
                x_out/=palmw
            if float(y_out)!=0.0:
                y_out/=palmh
            out_row.at[i]=[x_out,-1*y_out]
    except:
        #print(palmw,palmh,end=',')
        pass
    #out_row.iloc[0][0:21]
    return(out_row)


def to_sk_flat(image):
    lmks=get_landmarks(image)
    if type(lmks)==str:
        return('NA')
    lmks=pd.DataFrame([lmks]).iloc[0]
    lmks=reg_lm(lmks)
    lmks=np.array(lmks.to_list()).flatten()
    return(lmks)
    

In [None]:
def predict_image(model,img,letter=False):
    lmks=to_sk_flat(img)
    if type(lmks)==str:
        return('Hand not detected')
    test_batch=np.zeros((32,42))
    test_batch[0]=lmks
    pred_matrix=model.predict(test_batch)[0]
    pred=pred_matrix.argmax()
    
    if letter:
        return(mid_paths[pred],pred_matrix[pred])
    else:
        return(pred,pred_matrix[pred])

# Format dataset for easy usage

In [None]:
def index_images():
    #Index all filepaths in initial dataset
    global base_filepath, mid_paths, all_paths
    base_filepath="ASL/"
    mid_paths=[]
    for chara in string.ascii_uppercase:
        mid_paths.append(chara+'')
    mid_paths.append('space')
    mid_paths.remove('J')

    all_paths={}
    for m_path in mid_paths:
        all_paths[m_path]=os.listdir(base_filepath+m_path)

    try:
        # Index all 64x64 images
        os.chdir(base_dir)
        global base_filepath_64, all_paths_64
        base_filepath_64="ASL64/"
        all_paths_64={}
        for m_path in mid_paths:
            all_paths_64[m_path]=os.listdir(base_filepath_64+m_path)

        # Index training 64x64 images
        os.chdir(base_dir)
        global train_filepath_64, train_paths_64
        train_filepath_64="ASL64_train/"
        train_paths_64={}
        for m_path in mid_paths:
            train_paths_64[m_path]=os.listdir(train_filepath_64+m_path)
            
        # Index testing 64x64 images
        os.chdir(base_dir)
        global test_filepath_64, test_paths_64
        test_filepath_64="ASL64_test/"
        test_paths_64={}
        for m_path in mid_paths:
            test_paths_64[m_path]=os.listdir(test_filepath_64+m_path)
    except:
        pass

index_images()

In [None]:
RUN_CODE=False
if RUN_CODE:
    #Create directory for 64x64 images
    os.chdir(base_dir)
    os.mkdir(base_dir+'/ASL64/')
    for m_path in mid_paths:
        os.mkdir(base_dir+'/ASL64/'+m_path)

In [None]:
%%time

RUN_CODE=False
if RUN_CODE:  
    # Resize all images to 64x64 - easier to store, and easier for network to recognize
    # There's distortion in images that gets ignored at lower resolutions
    for m_path in mid_paths:
        print(m_path,end='...')
        for i in range(len(all_paths[m_path])):
            os.chdir(base_dir)
            image_path=base_filepath+m_path+'/'+all_paths[m_path][i]
            image_full=cv2.imread(image_path)
            image_full=cv2.resize(image_full, (64,64))
            os.chdir(base_dir+'/ASL64/'+m_path)
            image_name=all_paths[m_path][i].replace('.jpg','.png')
            cv2.imwrite(all_paths[m_path][i],image_full)
    os.chdir(base_dir)
    print('Done!')

In [None]:
index_images()

# Create Train/Test split directories

In [None]:
RUN_CODE=False
if RUN_CODE:
    #Create train/test directories
    os.chdir(base_dir)
    os.mkdir(base_dir+'/ASL64_train')
    for m_path in mid_paths:
        os.mkdir(base_dir+'/ASL64_train/'+m_path)
        
    os.mkdir(base_dir+'/ASL64_test')
    for m_path in mid_paths:
        os.mkdir(base_dir+'/ASL64_test/'+m_path)
    
    
    
    for m_path in mid_paths:
        print(m_path,end='...')
        for i in range(len(all_paths_64[m_path])):
            os.chdir(base_dir)
            image_path=base_filepath_64+m_path+'/'+all_paths_64[m_path][i]
            img_array=cv2.imread(image_path)
            image_name=all_paths_64[m_path][i].replace('.jpg','.png')
            
            if random.random()<0.2:
                os.chdir(base_dir+'/ASL64_test/'+m_path)
            else:
                os.chdir(base_dir+'/ASL64_train/'+m_path)
            cv2.imwrite(all_paths_64[m_path][i],img_array)
    print('Done!')        
            


# Create CNN model

In [None]:
images=image_dataset_from_directory('ASL64_train',image_size=(64, 64),label_mode="categorical")

CCNN = Sequential()

CCNN.add(InputLayer(input_shape=(64,64,3)))
CCNN.add(Conv2D(filters=8, kernel_size=3, activation='relu', padding='same'))
CCNN.add(MaxPooling2D())
CCNN.add(Conv2D(filters=16, kernel_size=5, activation='relu', padding='same'))
CCNN.add(MaxPooling2D())
CCNN.add(Conv2D(filters=24, kernel_size=7, activation='relu', padding='same'))
CCNN.add(MaxPooling2D())
CCNN.add(Conv2D(filters=32, kernel_size=9, activation='relu', padding='same'))
CCNN.add(GlobalAveragePooling2D())
CCNN.add(Dense(128, activation='relu'))
CCNN.add(Dense(64, activation='relu'))
CCNN.add(Dense(32, activation='relu'))
CCNN.add(Dense(26, activation='softmax'))

CCNN.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'],
)

CCNN.summary()

In [None]:
callback = EarlyStopping(monitor='loss', patience=5)
CCNN.fit(images, epochs=128, callbacks=[callback])

In [None]:
CCNN.trainable=False
CCNN.save(base_dir)

In [None]:
CCNN=tf.keras.models.load_model(base_dir)

# CNN Scoring

In [None]:
def score_model(model, batches):
    print(f'Scoring model with {batches} batches of 32 - total {batches*32} images')
    actuals=[]
    preds=[]
    seen=[]

    failcounter=0
    for b_num in range(batches):
        test_batch=np.zeros((32,64,64,3))
        print(b_num+1,end='.')
        for i_num in range(32):

            while True:
                m_path=random.randint(0,len(mid_paths)-1)
                m_path2=mid_paths[m_path]
                i_n=random.randint(0,len(all_paths_64[m_path2])-1)
                fullpath=base_filepath_64+m_path2+'/'+all_paths_64[m_path2][i_n]
                if fullpath in seen:
                    failcounter+=1
                else:
                    break
                    
            seen.append(fullpath)
            testim=load_img(fullpath, grayscale=False, color_mode="rgb", target_size=None, interpolation="nearest")
            testim=np.array(testim)
            #plt.imshow(testim)
            test_batch[i_num]=testim
            actuals.append(m_path)


        preds_soft=model.predict(test_batch)
        for p_num in range(32):
            preds_hard=preds_soft[p_num].argmax()
            if preds_soft[p_num][preds_hard]<0.25:
                preds_hard=-1
            preds.append(preds_hard)
    acc=accuracy_score(actuals,preds)
    print(f'\nFinished scoring model - {failcounter} re-samples that were replaced.')
    return(acc)

In [None]:
score_model(CCNN,1)


In [None]:
test_batch=np.zeros((32,64,64,3))
fullpath=base_filepath_64+'A/'+all_paths_64['A'][0]
image=load_img(fullpath, grayscale=False, color_mode="rgb", target_size=None, interpolation="nearest")
#image=cv2.resize(image, (64,64))
test_batch[0]=np.array(image)

In [None]:
preds=[]
preds_soft=CCNN.predict(test_batch)
for p_num in range(32):
    preds_hard=preds_soft[p_num].argmax()
    if preds_soft[p_num][preds_hard]<0.25:
        preds_hard=-1
    preds.append(preds_hard)

# Reformat photos into groups of landmarks.

In [None]:
category=[]
landmarks=[]
name=[]

failure=0
total=0

train_path_64='ASL64_train/'

for m_path in mid_paths:
    print(m_path,end='...')
    print(failure/(total+1))
    for i in range(len(train_paths_64[m_path])):
        if i%50==0:
            print(i,end='.')
        fullpath=train_path_64+m_path+'/'+train_paths_64[m_path][i]
        img=cv2.imread(fullpath)
        #print (img)
        lmarks=get_landmarks(img, False, False)
        if type(lmarks)!=str:
            landmarks.append(lmarks)
            name.append(train_paths_64[m_path][i])
            category.append(m_path)
        else:
            failure+=1
        total+=1
            
df_landmarks_train=pd.DataFrame(landmarks)
df_landmarks_train['category']=category
df_landmarks_train['name']=name

In [None]:
RUN_CODE=False
if RUN_CODE:
    f=open('df_landmarks_train','wb')
    pickle.dump(df_landmarks_train,f)
    f.close()

In [None]:
category=[]
landmarks=[]
name=[]

failure=0
total=0

test_path_64='ASL64_test/'

for m_path in mid_paths:
    print(m_path,end='...')
    print(failure/(total+1))
    for i in range(len(test_paths_64[m_path])):
        if i%50==0:
            print(i,end='.')
        fullpath=test_path_64+m_path+'/'+test_paths_64[m_path][i]
        img=cv2.imread(fullpath)
        #print (img)
        lmarks=get_landmarks(img, False, False)
        if type(lmarks)!=str:
            landmarks.append(lmarks)
            name.append(test_paths_64[m_path][i])
            category.append(m_path)
        else:
            failure+=1
        total+=1
            
df_landmarks_test=pd.DataFrame(landmarks)
df_landmarks_test['category']=category
df_landmarks_test['name']=name

In [None]:
RUN_CODE=False
if RUN_CODE:
    f=open('df_landmarks_test','wb')
    pickle.dump(df_landmarks_test,f)
    f.close()

In [None]:
p=train_path_64+'A'+'/'+train_paths_64['A'][0]
p=train_path_64+'A'+'/'+'A25.jpg'
print(p)
#p='WD_I.png'

### Shape of palm is measured by points 0, 5, 17

In [None]:
df_landmarks_train

In [None]:
# Reformat train data

RUN_CODE=False
if RUN_CODE:
    lmscale_rows=[]
    for i in range(len(df_landmarks_train)):
        if i%1000==0:
            print(i,end='.')
        lmscale_rows.append(reg_lm(df_landmarks_train.iloc[i]))


    df_lmscale_train=pd.DataFrame(lmscale_rows)
    df_lmscale_train

In [None]:
RUN_CODE=False
if RUN_CODE:
    f=open('df_lmscale_train','wb')
    pickle.dump(df_lmscale_train,f)
    f.close()

In [None]:
# Reformat test data

RUN_CODE=False
if RUN_CODE:
    lmscale_rows=[]
    for i in range(len(df_landmarks_test)):
        if i%1000==0:
            print(i,end='.')
        lmscale_rows.append(reg_lm(df_landmarks_test.iloc[i]))


    df_lmscale_test=pd.DataFrame(lmscale_rows)
    df_lmscale_test

In [None]:
RUN_CODE=False
if RUN_CODE:
    f=open('df_lmscale_test','wb')
    pickle.dump(df_lmscale_test,f)
    f.close()

# Change sets to numpy arrays for network

In [None]:
RUN_CODE=False
if RUN_CODE:

    f=open('df_lmscale_train','rb')
    df_lmscale_train=pickle.load(f)
    f.close()
    f=open('df_lmscale_test','rb')
    df_lmscale_test=pickle.load(f)
    f.close()

    df_lmscale_train=df_lmscale_train.sample(frac=1,random_state=hash('I CAN DO ANYTHING')%2**32)
    df_lmscale_test=df_lmscale_test.sample(frac=1,random_state=hash('[[Hyperlink Blocked]]')%2**32)



    X_train=[]
    y_train=[]
    for i in range(len(df_lmscale_train)):
        X_train.append(df_lmscale_train.iloc[i][0:21].to_list())
        y_train.append(mid_paths.index(df_lmscale_train.iloc[i]['category']))
    X_train=np.array(X_train)
    y_train=np.array(y_train)

    
    X_test=[]
    y_test=[]
    for i in range(len(df_lmscale_test)):
        X_test.append(df_lmscale_test.iloc[i][0:21].to_list())
        y_test.append(mid_paths.index(df_lmscale_test.iloc[i]['category']))
    X_test=np.array(X_test)
    y_test=np.array(y_test)

In [None]:
RUN_CODE=False
if RUN_CODE:
    f=open('X_train','wb')
    pickle.dump(X_train,f)
    f.close()
    f=open('y_train','wb')
    pickle.dump(y_train,f)
    f.close()
    f=open('X_test','wb')
    pickle.dump(X_test,f)
    f.close()
    f=open('y_test','wb')
    pickle.dump(y_test,f)
    f.close()

In [None]:
RUN_CODE=True
if RUN_CODE:
    f=open('X_train','rb')
    X_train=pickle.load(f)
    f.close()
    f=open('y_train','rb')
    y_train=pickle.load(f)
    f.close()
    f=open('X_test','rb')
    X_test=pickle.load(f)
    f.close()
    f=open('y_test','rb')
    y_test=pickle.load(f)
    f.close()

In [None]:
y_train=to_categorical(y_train)
y_test=to_categorical(y_test)

In [None]:
X_train=X_train.reshape(len(X_train),42)
X_test=X_test.reshape(len(X_test),42)

# Create neural network

In [None]:
LMNN = Sequential()
LMNN.add(InputLayer(input_shape=(42)))
LMNN.add(Dense(96, activation='relu'))
LMNN.add(Dense(256, activation='relu'))
LMNN.add(Dense(128, activation='relu'))
LMNN.add(Dense(64, activation='relu'))
LMNN.add(Dense(26, activation='softmax'))
LMNN.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'],
)
#LMNN.summary()

In [None]:
RUN_CODE=False
if RUN_CODE:
    callback = EarlyStopping(monitor='loss', patience=5)
    LMNN.fit(X_train, y_train, batch_size=32,callbacks=[callback], verbose=True, validation_split=0.2, epochs=256)

In [None]:
RUN_CODE=False
if RUN_CODE:
    LMNN.save(base_dir)

In [None]:
LMNN=tf.keras.models.load_model(base_dir)

# Run neural network using standardized skeleton positions

In [None]:
def score_skeleton(model, batches):
    
    print(f'Scoring model with {batches} batches of 32 - total {batches*32} skeletons')
    actuals=[]
    preds=[]
    seen=[]

    failcounter=0
    for b_num in range(batches):
        test_batch=np.zeros((32,42))
        print(b_num+1,end='.')

        for i_num in range(32):
            idx=random.randint(0,len(X_test)-1)
            lmks=X_test[idx]
            
            test_batch[i_num]=lmks
            actuals.append(y_test[idx].argmax())

        preds_soft=model.predict(test_batch)
        for p_num in range(32):
            preds_hard=preds_soft[p_num].argmax()
            if preds_soft[p_num][preds_hard]<0.25:
                preds_hard=-1
            preds.append(preds_hard)
    acc=accuracy_score(actuals,preds)
    print(f'\nFinished scoring model - {failcounter} re-samples that were replaced.')
    return(acc,actuals,preds)
    

In [None]:
data=score_skeleton(LMNN,512)

In [None]:
data_df=pd.DataFrame(data[1],data[2]).reset_index()

In [None]:
data_df

In [None]:
accuracy={}
accuracyL=[]
for i in range(26):
    d2df=data_df[data_df['index']==i]
    accuracy[mid_paths[i]]=len(d2df[d2df['index']==d2df[0]])/len(d2df['index'])
    accuracyL.append(len(d2df[d2df['index']==d2df[0]])/len(d2df['index']))
accuracy

In [None]:
plt.figure(figsize=(12,8))
plt.bar(mid_paths,accuracyL)
plt.axis([-0.75,25.75,0.7,1])

In [None]:
accuracyL.remove(0.821608040201005)
accuracyL.remove(0.8892215568862275)
min(accuracyL),accuracyL.index(min(accuracyL))

In [None]:
UVDEQ

In [None]:
len(d2df[d2df['index']==d2df[0]])/len(d2df['index'])

In [None]:
capture = cv2.VideoCapture(1)

# Hold the background frame for background subtraction.
background = None
# Hold the hand's data so all its details are in one place.
hand = None
# Variables to count how many frames have passed and to set the size of the window.
frames_elapsed = 0
FRAME_HEIGHT = 720
FRAME_WIDTH = 1280
# Humans come in a ton of beautiful shades and colors.
# Try editing these if your program has trouble recognizing your skin tone.
CALIBRATION_TIME = 30
BG_WEIGHT = 0.5
OBJ_THRESHOLD = 18

i=0

sq_size=512
sqh_off=500
sqv_off=500

while True:
    i+=1
    
    # Store the frame from the video capture and resize it to the desired window size.
    ret, frame = capture.read()
    frame = frame[sqh_off:sqh_off+sq_size, sqv_off:sqv_off+sq_size]
    frame=frame[len(frame):0:-1,len(frame):0:-1]
    #frame = cv2.resize(frame, (FRAME_WIDTH, FRAME_HEIGHT))
    frame_copy=frame.copy()
    p=predict_image(LMNN,frame_copy, True)
    text_to_draw=str(p[0]+str(p[1]))
    
    get_landmarks(frame, True)
    cv2.putText(frame,text_to_draw,(25,25),0,1,0)
    cv2.imshow("Camera Input", frame)


    # Check if user wants to exit.
    if (cv2.waitKey(1) & 0xFF == ord('x')):
        break

# When we exit the loop, we have to stop the capture too.
# capture.release()
# cv2.destroyAllWindows()

In [None]:
setup_hands(0.2)

In [None]:
frame=cv2.imread('WD_I.png')


In [None]:
blank=cv2.imread('blank400.png')

In [None]:
predict_image(LMNN,frame, True)

In [None]:
cv2.imwrite('tes2.png',frame)

In [None]:
plt.imshow(frame)

In [None]:
framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
x , y, c = frame.shape

# Get hand landmark prediction
result = hands.process(framergb)
# post process the result

landmarks = []           
for handslms in result.multi_hand_landmarks:
    for lm in handslms.landmark:
        # print(id, lm)
        lmx = int(lm.x * x)
        lmy = int(lm.y * y)
        landmarks.append([lmx, lmy])
    # Drawing landmarks on frames
    mpDraw.draw_landmarks(blank, handslms, mpHands.HAND_CONNECTIONS)


In [None]:
cv2.imwrite('hand400.png',blank)

In [None]:
pointsx=[landmarks[0][0], landmarks[5][0], landmarks[17][0]]
pointsy=[landmarks[0][1], landmarks[5][1], landmarks[17][1]]

plt.scatter(pointsx, pointsy)

In [None]:
a=to_sk_flat(frame)

In [None]:
plt.figure(figsize=[12,8])
plt.axis([-2,1,-0.1,2.5])
plt.scatter(a[0::2],a[1::2], color='#f00',alpha=0.25)
plt.scatter(a[0],a[1], color='#f00')
plt.scatter(a[10],a[11], color='#f00')
plt.scatter(a[34],a[35], color='#f00')
plt.plot([a[0],a[10]],[a[1],a[11]],color='#f00')
plt.plot([a[0],a[34]],[a[1],a[35]],color='#f00')
plt.plot([a[10],a[34]],[a[11],a[35]],color='#f00')

# In case 2D recognition doesn't work, create standard "bone lengths" using my own hand.


In [None]:
imt=cv2.imread('WD_I.png')
#imt=cv2.imread(p)
#imt=cv2.resize(imt, (64,64))
lmsl=get_landmarks(imt)
lmsl.append('WDI')
lmsl.append('WD_I.png')
lms=pd.DataFrame([lmsl])

#lms=lms.transpose()
r=reg_lm(lms.iloc[0])
plt.imshow(imt)
for item in lmsl[0:21]:
    plt.scatter(item[0],item[1])