# 1. Importing Libraries

In [1]:
import cv2 #opencv
import numpy as np
import os   #helps with path
from matplotlib import pyplot as plt #to use plt.imshow()
import time              #to measure time between frames 
import mediapipe as mp   


# 2 . drawingutil and hands module

In [2]:
mpDraw=mp.solutions.drawing_utils
mpHands=mp.solutions.hands

# 3. Function to detect points on hands and then drawing on the hands

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB because by default opencv use bgr but we need rgb for mediapipe to process image
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [4]:
def draw_landmarks(image, results):
    if results.multi_hand_landmarks:
        for num, handsLms in enumerate(results.multi_hand_landmarks):
            mpDraw.draw_landmarks(image,handsLms, mpHands.HAND_CONNECTIONS,
                             mpDraw.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mpDraw.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2) )

# 4. function to get handedness ie left or right and to extract point to numpy array

In [5]:
def get_label(index,results):
    label = None
    for idx, classification in enumerate(results.multi_handedness):
        if classification.classification[0].index == index:
            label = classification.classification[0].label
#         print(index,idx,label)
    if label:
        return label
    else:
        if index == 1:
            return get_label(0,results)
        elif index == 0:
            return get_label(1,results)
        else:
            return label
def extract_keypoints(results):
    hands = [np.zeros(21*3),np.zeros(21*3)]
    if results.multi_hand_landmarks:
        for num, handsLms in enumerate(results.multi_hand_landmarks):
            label = get_label(num,results)
            if label == 'Right':
                hands[0] = np.array([[res.x, res.y, res.z] for res in handsLms.landmark]).flatten()
            if label == 'Left':
                hands[1] = np.array([[res.x, res.y, res.z] for res in handsLms.landmark]).flatten()
            
    return np.concatenate(hands)

# 5. Number of videos of dataset and videolength

In [6]:
# Thirty videos worth of data
no_videos = 200

# Videos are going to be 30 frames in length
video_length = 30

# 6. defining path of dataset and also path where we will store the processed data ie folder p1data in current directory

dataset link <a href="https://drive.google.com/drive/folders/1RZaXXy3pr7YLSv1jKFXDgmNC2nzkgV1P?usp=sharing">DATASET<a>

In [25]:
cwd = os.getcwd()
print(cwd)

C:\Users\91963\Desktop\HandSignRecognition\DEMO


In [8]:
datapath = os.path.join('KgData/asl4g/train/')
os.listdir(datapath)

['A',
 'B',
 'C',
 'D',
 'del',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'nothing',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'space',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z']

In [9]:
#path for numpy array of data
DATA_PATH = os.path.join('p1Data')

In [10]:
os.listdir(cwd)

['.ipynb_checkpoints',
 'HandGestureRecognition.ipynb',
 'HandGestureRecognitionNeuralNetwork.ipynb',
 'KgData',
 'model1.h5',
 'p1Data',
 'trainLogsdLstm']

# 7. defining the categories of signs 

In [7]:
categories = np.array(['A','B','C','D','E','F','G','H','I','J','del','space','nothing'])

# 8. making directory to store the processed data in p1Data folder

In [39]:
for category in categories: 
    for i in range(no_videos):
        try: 
            os.makedirs(os.path.join(DATA_PATH,category, str(i)))
        except:
            pass

In [25]:
# os.walk()

# 9. checking data is properly fetching  and also the camera feed

In [36]:
for category in categories:
    path = os.path.join(datapath,category)
    for imgpath in os.listdir(path):
        img_array = cv2.imread(os.path.join(path,imgpath))
        img_array = cv2.resize(img_array,(480,640))
        img_array2 = cv2.flip(img_array,1)
        cv2.imshow("frame",img_array)
        cv2.imshow("frame2",img_array2)
        if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
cv2.destroyAllWindows()

In [15]:
cap = cv2.VideoCapture(0) #creating video capture object
## Set mediapipe model

while cap.isOpened():
    #reading feed current frame
    ret,frame = cap.read() 
    flipframe = cv2.flip(frame,1)
    cv2.imshow("Open Cv1",flipframe) 
    cv2.imshow('o2',frame)
    if cv2.waitKey(10) & 0xFF == ord('q'): #wait for 34 milli second to check if q is pressed on keyboard
        break 
cap.release() #release our webcame
cv2.destroyAllWindows()

# 10. Extracting keypoints using extract_keypoint funtion and for every frame   #  for every category we are creating 200 videos and every video contains 30 frames.

In [40]:
with mpHands.Hands() as model:
    for category in categories:
        path = os.path.join(datapath,category)
        adl = os.listdir(path)
        cdl = 0
        for i in range(no_videos):
            b = False
            for j in range(video_length):
                image = cv2.imread(os.path.join(path,adl[cdl]))
#                 image = cv2.flip(image,1)
                image = cv2.resize(image,(640,480))
                image, results = mediapipe_detection(image, model)
                draw_landmarks(image, results)
#                 print(results.multi_handedness)
                cdl += 1
                cv2.putText(image,'f no {} Vno {} c {}'.format(j,i,category), (10,20), 
                                   cv2.FONT_HERSHEY_SIMPLEX,0.5, (150, 0, 255), 1, cv2.LINE_AA)
                cv2.imshow("frame",image)
                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, category, str(i), str(j))
                np.save(npy_path, keypoints)
                
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    b = True
                    break
            if b:
                break


    cv2.destroyAllWindows()

# 11. converting categories using one hot encoding and splitting data into train and test 

In [26]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
label_map = {label:num for num, label in enumerate(categories)}

In [27]:
label_map

{'A': 0,
 'B': 1,
 'C': 2,
 'D': 3,
 'E': 4,
 'F': 5,
 'G': 6,
 'H': 7,
 'I': 8,
 'J': 9,
 'del': 10,
 'space': 11,
 'nothing': 12}

In [28]:
videos, labels = [], []
for category in categories:
    for video in range(no_videos):
        window = []
        for frame_num in range(video_length):
            res = np.load(os.path.join(DATA_PATH, category, str(video), "{}.npy".format(frame_num)))
            window.append(res)
        videos.append(window)
        labels.append(label_map[category])

In [29]:
X = np.array(videos)

In [30]:
X.shape

(2600, 30, 126)

In [31]:
y = to_categorical(labels).astype(int) #one hot encoding to convert categorial variable

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [33]:
y_test.shape

(520, 13)

In [34]:
X_train.shape

(2080, 30, 126)

# 12. Building model and training

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense,Flatten
from tensorflow.keras.callbacks import TensorBoard

In [9]:
import tensorflow as tf
ACCURACY_THRESHOLD = 0.97
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        # print(logs.get('acc'))
        # print(logs.get('categorical_accuracy'))
        if(logs.get('categorical_accuracy') > ACCURACY_THRESHOLD):
            # print(logs.get('acc'))
            # print(logs.get('categorical_accuracy'))
            print("\nReached %2.2f%% accuracy, so stopping training!!" %(ACCURACY_THRESHOLD*100))
            self.model.stop_training = True

# Instantiate a callback object
callbacks = myCallback()

In [10]:
log_dir = os.path.join('trainLogsdNeuralNetwork2')
tb_callback = TensorBoard(log_dir=log_dir)

In [11]:
mycallbacks = [callbacks,tb_callback]

In [12]:
model = Sequential()
model.add(Dense(64,activation='relu',input_shape=(30,126)))
model.add(Dense(64, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Flatten())
model.add(Dense(categories.shape[0], activation='softmax'))
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [57]:
model.fit(X_train, y_train, epochs=500, callbacks=mycallbacks)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500


Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500
Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 

Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500


Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 

<tensorflow.python.keras.callbacks.History at 0x1ba23f34c88>

In [58]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_34 (Dense)             (None, 30, 64)            8128      
_________________________________________________________________
dense_35 (Dense)             (None, 30, 64)            4160      
_________________________________________________________________
dense_36 (Dense)             (None, 30, 128)           8320      
_________________________________________________________________
dense_37 (Dense)             (None, 30, 256)           33024     
_________________________________________________________________
dense_38 (Dense)             (None, 30, 512)           131584    
_________________________________________________________________
dense_39 (Dense)             (None, 30, 512)           262656    
_________________________________________________________________
dense_40 (Dense)             (None, 30, 256)          

# 13. Make prediction 

In [59]:
res = model.predict(X_test)

In [60]:
categories[np.argmax(res[0])]

'nothing'

In [61]:
categories[np.argmax(y_test[0])]

'nothing'

# 14. Save Model

In [14]:
model.save('modelnn3.h5')

# Load Model

In [13]:
model.load_weights('modelnn2.h5')

# 15. Evaluation using Confusion Matrix and Accuracy


In [14]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [64]:
yhat = model.predict(X_test)

In [65]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [66]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[465,   2],
        [ 10,  43]],

       [[480,   2],
        [  2,  36]],

       [[477,   6],
        [  2,  35]],

       [[477,   3],
        [  4,  36]],

       [[484,   4],
        [  4,  28]],

       [[482,   1],
        [  2,  35]],

       [[482,   0],
        [  0,  38]],

       [[486,   0],
        [  2,  32]],

       [[473,   2],
        [  5,  40]],

       [[468,   0],
        [  3,  49]],

       [[483,   1],
        [  8,  28]],

       [[480,   4],
        [  7,  29]],

       [[454,  24],
        [  0,  42]]], dtype=int64)

In [16]:
accuracy_score(ytrue, yhat)

# 16. Testing in real Time

In [33]:
colors = [(245,117,16), (117,245,16), (16,117,245),(166,107,245),(106,167,245),(106,117,205),(161,107,25),(130,187,245),(106,117,205),(106,100,245),(196,107,245), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [34]:
back = cv2.imread('board.jpg')
back = cv2.resize(back,(800,800))
# cv2.imshow('back',back)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

In [16]:
# 1. New detection variables
sequence = []
sentence = []
threshold = 0.8

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mpHands.Hands() as hand:
    while cap.isOpened():
        board = back
        # Read feed
        ret, frame = cap.read()
        # Make detections
        image, results = mediapipe_detection(frame,hand)

        
        # Draw landmarks
        draw_landmarks(image, results)
        # 2. Prediction logic
        keypoints = extract_keypoints(results)

        sequence.append(keypoints)
#         print(sequence)
        sequence = sequence[-30:]
#         print(sequence)
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
        #3. Viz logic
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if categories[np.argmax(res)] != sentence[-1]:
                        sentence.append(categories[np.argmax(res)])
                else:
                    sentence.append(categories[np.argmax(res)])

            if len(sentence) > 10: 
                sentence = sentence[-20:]

            # Viz probabilities
            board = prob_viz(res, categories,board, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ''.join(sentence).replace('-',' '), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)
        cv2.imshow('viz',board)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

NameError: name 'back' is not defined

In [18]:
cv2.destroyAllWindows()
cap.release()

In [None]:
# sequence = []
# sentence = []
# threshold = 0.8
# with mpHands.Hands() as model:
#     for category in categories:
#         path = os.path.join(datapath,category)
#         adl = os.listdir(path)
#         cdl = 0
#         for i in range(no_videos):
#             b = False
#             for j in range(video_length):
#                 image = cv2.imread(os.path.join(path,adl[cdl]))
# #                 image = cv2.flip(image,1)
#                 image = cv2.resize(image,(640,480))
#                 image, results = mediapipe_detection(image, model)
#                 draw_landmarks(image, results)
# #                 print(results.multi_handedness)
#                 cdl += 1
#                 cv2.putText(image,'f no {} Vno {} c {}'.format(j,i,category), (10,20), 
#                                    cv2.FONT_HERSHEY_SIMPLEX,0.5, (150, 0, 255), 1, cv2.LINE_AA)
#                 cv2.imshow("frame",image)
#                 # NEW Export keypoints
#                 keypoints = extract_keypoints(results)
#                 npy_path = os.path.join(DATA_PATH, category, str(i), str(j))
#                 if cv2.waitKey(1) & 0xFF == ord('q'):
#                     b = True
#                     break
#             if b:
#                 break


#     cv2.destroyAllWindows()

In [None]:
#adding more layer to neural network