In [198]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

import tensorflow as tf
from tensorflow import keras,lite

from keras.models import load_model
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import TensorBoard

from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [199]:
tf.version.VERSION

'2.9.1'

In [201]:
mp_hands = mp.solutions.hands # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results


def draw_styled_landmarks(image, results):
    # Draw left hand connections
    if not results.multi_hand_landmarks:
        return
    for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    
def draw_landmarks(image, results):
    if not results.multi_hand_landmarks:
        return
    for hand_landmarks in results.multi_hand_landmarks:
        mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS) # Draw left hand connections
    
def extract_keypoints(results):
    lh = np.array([[res.landmark[point].x, res.landmark[point].y, res.landmark[point].z] for point in mp_hands.HandLandmark for res in results.multi_hand_landmarks], dtype=float).flatten() if results.multi_hand_landmarks else np.zeros(21*3*2)
    if lh.shape[0] == 63:
        rh = np.array([[res.landmark[point].x, res.landmark[point].y, res.landmark[point].z] for point in mp_hands.HandLandmark for res in results.multi_hand_landmarks], dtype=float).flatten() if results.multi_hand_landmarks else np.zeros(21*3)
        return np.concatenate([lh, rh])
    # rh = np.array([[res.landmark[point].x, res.landmark[point].y, res.landmark[point].z] for point in mp_hands.HandLandmark for res in results.multi_hand_landmarks], dtype=float).flatten() if results.multi_hand_landmarks else np.zeros(21*3*2)
    # return lh
    return np.concatenate([lh])


In [202]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect
actions = np.array(['hello','play','okay','nice','iloveu','peace','promise','why','hungry','temple'])

# Thirty videos worth of data
no_sequences = 50
# no_sequences = 10

# Videos are going to be 30 frames in length
sequence_length = 30

# Folder start
start_folder = 0

In [203]:
for action in actions: 
    for sequence in range(0,no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [127]:
try:
    cap = cv2.VideoCapture(0)
    # Set mediapipe model 
    with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
        
        # NEW LOOP

        # Loop through actions
        for action in actions:
            # Loop through sequences aka videos
            for sequence in range(no_sequences):
                # Loop through video length aka sequence length
                for frame_num in range(sequence_length):

                    # Read feed
                    ret, frame = cap.read()

                    # Make detections
                    image, results = mediapipe_detection(frame, hands)

                    # Draw landmarks
                    draw_styled_landmarks(image, results)
                    
                    #A NEW Apply wait logic
                    if frame_num == 0: 
                        cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                        cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                        # Show to screen
                        cv2.imshow('OpenCV Feed', image)
                        cv2.waitKey(500)
                    else: 
                        cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                        # Show to screen
                        cv2.imshow('OpenCV Feed', image)
                    
                    # NEW Export keypoints
                    keypoints = extract_keypoints(results)
                    # print(keypoints.shape)
                    npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                    np.save(npy_path, keypoints)

                    # Break gracefully
                    if cv2.waitKey(10) & 0xFF == ord('q'):
                        break
                        
        cap.release()
        cv2.destroyAllWindows()

finally:
    # print("error")
    cap.release()
    cv2.destroyAllWindows()

In [128]:
cap.release()
cv2.destroyAllWindows()

In [204]:
label_map = {label: num for num, label in enumerate(actions)}
print(label_map)

{'hello': 0, 'play': 1, 'okay': 2, 'nice': 3, 'iloveu': 4, 'peace': 5, 'promise': 6, 'why': 7, 'hungry': 8, 'temple': 9}


In [205]:
sequences, labels = [], []
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])


In [206]:
# print(sequences)
print(np.array(sequences).shape)
print(np.array(labels).shape)
X = np.array(sequences)
print(X.shape)
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(y_test.shape)

(500, 30, 126)
(500,)
(500, 30, 126)
(100, 10)


In [207]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(sequence_length,X.shape[2])))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [133]:
# del model

In [208]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_30 (LSTM)              (None, 30, 64)            48896     
                                                                 
 lstm_31 (LSTM)              (None, 30, 128)           98816     
                                                                 
 lstm_32 (LSTM)              (None, 64)                49408     
                                                                 
 dense_30 (Dense)            (None, 64)                4160      
                                                                 
 dense_31 (Dense)            (None, 32)                2080      
                                                                 
 dense_32 (Dense)            (None, 10)                330       
                                                                 
Total params: 203,690
Trainable params: 203,690
Non-t

In [213]:
epoch_size=len(actions)*30
model.fit(X_train, y_train, epochs=epoch_size, callbacks=[tb_callback])

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

<keras.callbacks.History at 0x21590759670>

In [193]:
res = model.predict(X_test)
res



array([[3.4782941e-23, 9.6471022e-14, 6.2078011e-06, 9.0691560e-13,
        9.1811089e-04, 1.3946699e-10, 9.9907565e-01],
       [2.1336741e-20, 2.0603297e-06, 9.9859065e-01, 3.1032695e-07,
        2.9453687e-07, 5.1226839e-17, 1.4066478e-03],
       [9.9997830e-01, 1.1146246e-08, 9.3881370e-13, 7.5086770e-11,
        1.3565147e-09, 2.1722286e-05, 2.3130159e-14],
       [1.6896460e-12, 2.3246844e-08, 1.4076614e-06, 1.3652085e-10,
        9.9809307e-01, 8.8526373e-05, 1.8169510e-03],
       [2.5645099e-11, 1.8920156e-07, 5.0391695e-06, 1.7216041e-09,
        9.9581122e-01, 3.0834746e-04, 3.8753452e-03],
       [1.1686056e-20, 1.8306340e-06, 9.9887019e-01, 2.2442886e-07,
        2.3564442e-07, 2.9057012e-17, 1.1275080e-03],
       [3.9090379e-04, 2.5148990e-03, 6.7230983e-04, 9.9522841e-01,
        3.1506186e-04, 1.9373845e-05, 8.5893471e-04],
       [5.1978355e-17, 9.9992406e-01, 7.5639349e-05, 2.7877712e-07,
        2.9553653e-15, 1.9163626e-23, 1.1827558e-11],
       [9.9956650e-01, 2

In [194]:
print(res.shape)
for i in range(0, res.shape[0]):
    print(i, "  ->  ", actions[np.argmax(res[i])], "   ", actions[np.argmax(y_test[i])])
# position = 0
# print(actions[np.argmax(res[position])])
# print(actions[np.argmax(y_test[position])])

(70, 7)
0   ->   promise     promise
1   ->   okay     okay
2   ->   hello     hello
3   ->   iloveu     iloveu
4   ->   iloveu     promise
5   ->   okay     okay
6   ->   nice     nice
7   ->   play     play
8   ->   hello     hello
9   ->   iloveu     iloveu
10   ->   promise     promise
11   ->   nice     nice
12   ->   play     play
13   ->   nice     nice
14   ->   play     play
15   ->   peace     peace
16   ->   promise     promise
17   ->   promise     promise
18   ->   play     play
19   ->   play     play
20   ->   play     play
21   ->   promise     promise
22   ->   nice     nice
23   ->   okay     okay
24   ->   promise     promise
25   ->   play     play
26   ->   peace     peace
27   ->   iloveu     iloveu
28   ->   iloveu     iloveu
29   ->   iloveu     iloveu
30   ->   peace     peace
31   ->   peace     peace
32   ->   hello     hello
33   ->   promise     promise
34   ->   play     play
35   ->   promise     promise
36   ->   play     play
37   ->   nice     nice
38 

In [197]:
model.save("action.h5")
# del model
model.load_weights("action.h5")

In [139]:
model = load_model('action.h5')

converter = lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.experimental_new_converter=True
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]

tflite_model = converter.convert()
open("action.tflite", "wb").write(tflite_model)

INFO:tensorflow:Assets written to: C:\Users\Mayurdhvajsinh\AppData\Local\Temp\tmplh1afnvq\assets


INFO:tensorflow:Assets written to: C:\Users\Mayurdhvajsinh\AppData\Local\Temp\tmplh1afnvq\assets


235208

In [195]:
yhat = model.predict(X_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

multilabel_confusion_matrix(ytrue, yhat)



array([[[63,  0],
        [ 0,  7]],

       [[60,  0],
        [ 0, 10]],

       [[59,  0],
        [ 0, 11]],

       [[60,  0],
        [ 0, 10]],

       [[60,  1],
        [ 0,  9]],

       [[59,  0],
        [ 0, 11]],

       [[58,  0],
        [ 1, 11]]], dtype=int64)

In [196]:
accuracy_score(ytrue, yhat)

0.9857142857142858