In [1]:
#Importing Librabries

import cv2 
import numpy as np
import os 
import matplotlib.pyplot as plt
import time 
import mediapipe as mp


In [2]:
mp_holistic=  mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable =False 
    results = model.process(image)
    image.flags.writeable =True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [4]:
##rendering Landmarks 
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION)
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    

In [5]:
def draw_landmarks2(image,result):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                              mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius =1 ),
                              mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius =1 )
                              
                             
                             )
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80,22,10),thickness=2,circle_radius =1 ),
                              mp_drawing.DrawingSpec(color=(80,44,10),thickness=2,circle_radius =1 )
                              )
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80,22,10),thickness=2,circle_radius =1 ),
                              mp_drawing.DrawingSpec(color=(80,44,10),thickness=2,circle_radius =1 )
                              )
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80,117,10),thickness=2,circle_radius =1 ),
                              mp_drawing.DrawingSpec(color=(80,66,10),thickness=2,circle_radius =1 )
                              )
    
    

In [60]:
##Tracking Face and Hands
i = 0
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        #Using Mediapipe

        image, results = mediapipe_detection(frame, holistic)   
        
        #Drawing Ladmarks
        draw_landmarks2(image,results)
        
        
        cv2.imshow("frame" , image)
       # cv2.imwrite("image.jpg", image)
        if cv2.waitKey(1)==ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

In [6]:
##Extracting Landmarks 
"""pose =[]
for res in results.pose_landmarks.landmark:
    test = np.array([res.x,res.y,res.z,res.visibility])
    pose.append(test)"""

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

test = extract_keypoints(results)
test.shape

NameError: name 'results' is not defined

In [7]:
data_path = os.path.join("mp_data/")
actions= np.array(['Thankyou', "Hello", "Which"])
no_sequences =30 #30 Videos for each actions
sequence_length = 30 ##30 Frames


In [89]:
#Making Folders 

for action in actions:
    for s in range(no_sequences):
        try:
            os.makedirs(os.path.join(data_path, action, str(s)))
        except:
            pass


In [90]:
##collecting images

cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
    for action in actions:
        for s in range(no_sequences):
            for i in range(sequence_length):
                
            
        
                ret, frame = cap.read()
                #Using Mediapipe

                image, results = mediapipe_detection(frame, holistic)   

                #Drawing Ladmarks
                draw_landmarks2(image,results)

                if i==0:
                    cv2.putText(image, "Starting", (120,200), cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),4,cv2.LINE_AA)
                    cv2.putText(image, "Collecting Fraes for  {} Video Number {}".format(action,s ), (15,12), cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)
                    cv2.waitKey(1000)
                else:
                    cv2.putText(image, "Collecting Frames for  {} Video Number {}".format(action,s), (15,12), cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)

                keypoints = extract_keypoints(results)
                npy_path = os.path.join(data_path, action,str(s),str(i))
                np.save(npy_path,keypoints)
                cv2.imshow("frame" , image)

                # cv2.imwrite("image.jpg", image)
                if cv2.waitKey(1)==ord('q'):
                    break
    cap.release()
    cv2.destroyAllWindows()

In [19]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

label_map = {label:num for num,label in enumerate(actions)}

label_map

{'Thankyou': 0, 'Hello': 1, 'Which': 2}

In [22]:
sequences, labels = [],[]
for action in actions:
    for seq in range(no_sequences):
        window=[]
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(data_path,action , str(seq),"{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])
        
np.array(sequences).shape

(90, 30, 1662)

In [27]:
y = to_categorical(labels).astype(int)
X = np.array(sequences)
X.shape

(90, 30, 1662)

In [30]:
##SPlitting Data

X_train , X_test, y_train, y_test = train_test_split(X,y,test_size = 0.05)



(5, 3)

In [34]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [35]:
log_dir = os.path.join("logs")
tb_callback = TensorBoard(log_dir=log_dir)



In [44]:
model = Sequential()
model.add(LSTM(64, return_sequences = True, activation = 'relu',input_shape= (30,1662)))
model.add(LSTM(128,return_sequences= True, activation = 'relu'))
model.add(LSTM(64,return_sequences= False, activation = 'relu'))
model.add(Dense(64,activation= 'relu'))
model.add(Dense(32,activation= 'relu'))
model.add(Dense(actions.shape[0],activation= 'softmax'))

model.compile(optimizer= "adam", loss = "categorical_crossentropy",metrics=['categorical_accuracy'])
model.fit(X_train, y_train, epochs = 75,callbacks= [tb_callback])

Train on 85 samples
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoch 14/75
Epoch 15/75
Epoch 16/75
Epoch 17/75
Epoch 18/75
Epoch 19/75
Epoch 20/75
Epoch 21/75
Epoch 22/75
Epoch 23/75
Epoch 24/75
Epoch 25/75
Epoch 26/75
Epoch 27/75
Epoch 28/75
Epoch 29/75
Epoch 30/75
Epoch 31/75
Epoch 32/75
Epoch 33/75
Epoch 34/75
Epoch 35/75
Epoch 36/75
Epoch 37/75
Epoch 38/75
Epoch 39/75
Epoch 40/75
Epoch 41/75
Epoch 42/75
Epoch 43/75
Epoch 44/75
Epoch 45/75
Epoch 46/75
Epoch 47/75
Epoch 48/75
Epoch 49/75
Epoch 50/75
Epoch 51/75
Epoch 52/75
Epoch 53/75
Epoch 54/75
Epoch 55/75
Epoch 56/75
Epoch 57/75
Epoch 58/75
Epoch 59/75
Epoch 60/75
Epoch 61/75
Epoch 62/75
Epoch 63/75
Epoch 64/75
Epoch 65/75
Epoch 66/75
Epoch 67/75
Epoch 68/75
Epoch 69/75
Epoch 70/75
Epoch 71/75
Epoch 72/75
Epoch 73/75


Epoch 74/75
Epoch 75/75


<tensorflow.python.keras.callbacks.History at 0x1e89cf89988>

In [45]:
res = model.predict(X_test)
print(res)


[[5.4557160e-07 9.9999702e-01 2.3331006e-06]
 [4.3197298e-01 1.0195378e-04 5.6792504e-01]
 [4.7548665e-06 2.1823414e-15 9.9999523e-01]
 [3.2116025e-04 7.7560718e-12 9.9967885e-01]
 [2.8446260e-07 9.9999928e-01 4.9735797e-07]]


In [58]:
print(actions[np.argmax(res[4])])

print(actions[np.argmax(y_test[4])])

Hello
Hello


In [59]:
model.save("model1.h5")

In [64]:
##MODEL EVALUATION

from sklearn.metrics import multilabel_confusion_matrix,accuracy_score
yhat = model.predict(X_train)

ytrue= np.argmax(y_train,axis =1 ).tolist()
yhat = np.argmax(yhat,axis = 1 ).tolist()

multilabel_confusion_matrix(ytrue, yhat)


array([[[55,  1],
        [ 0, 29]],

       [[57,  0],
        [ 1, 27]],

       [[57,  0],
        [ 0, 28]]], dtype=int64)

In [65]:
accuracy_score(ytrue , yhat)

0.9882352941176471

In [86]:
##Final testing
sequences = []#concatenating 30 frames
sentences = []
predictions = []
threshold = 0.7

cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        #Using Mediapipe

        image, results = mediapipe_detection(frame, holistic)   
        
        #Drawing Ladmarks
        draw_landmarks2(image,results)
        
        
        ##making predictions
        
        keypoints = extract_keypoints(results)
        #sequences.insert(0,keypoints)
        sequences.append(keypoints)
        sequences = sequences[-30:]
        
        if len(sequences)==30:
            res = model.predict(np.expand_dims(sequences,axis = 0))[0]
            predictions.append(np.argmax(res))
        
            if np.unique(predictions[-10:])[0]==np.argmax(res):
                if res[np.argmax(res)]>threshold:

                    if len(sentences)>0:
                        if actions[np.argmax(res)]!=sentences[-1]:
                            sentences.append(actions[np.argmax(res)])
                            print(actions[np.argmax(res)])
                    else:
                        sentences.append(actions[np.argmax(res)])
            if len(sentences)>5:
                sentences = sentences[-5:]

        cv2.rectangle(image, (0,0),(640,40), (245,117,16),-1 )
        cv2.putText(image,"".join(sentences),(3,30),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2,cv2.LINE_AA)
        
        
        cv2.imshow("frame" , image)
       # cv2.imwrite("image.jpg", image)
        if cv2.waitKey(1)==ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

Thankyou
Which
Thankyou
Which
Thankyou
Hello
Thankyou
Hello
