In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import numpy as np
import os

dataPath = os.path.join("Data")
actions = np.array(['scrollDown', 'scrollUp', 'increaseVol', 'decreaseVol'])
noSequences = 30
sequenceLength = 30

label_map = {label:num for num, label in enumerate(actions)}

In [None]:
label_map

In [None]:
os.listdir()

In [5]:
sequences, labels = [], []
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(dataPath, action))).astype(int):
        window = []
        for frameNum in range(sequenceLength):
            res = np.load(os.path.join(dataPath, action, str(sequence), "{}.npy".format(frameNum)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [6]:
x = np.array(sequences)
y = to_categorical(labels).astype(int)
x = np.reshape(x, (120, 30, 63))
x.shape

(120, 30, 63)

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.05)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping

log_dir = os.path.join("Logs")
tb_callback = TensorBoard(log_dir=log_dir)
es_callback = EarlyStopping(monitor='val_loss', patience=3)

In [28]:
model = Sequential()
model.add(LSTM(64, return_sequences = True, activation='relu', input_shape=(30, 63)))
model.add(LSTM(128, return_sequences = True, activation='relu'))
model.add(LSTM(64, return_sequences = False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

  super().__init__(**kwargs)


In [38]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy', 'accuracy'])
model.fit(x_train, y_train, epochs=2000, callbacks=[tb_callback, es_callback])

Epoch 1/2000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 260ms/step - accuracy: 0.8898 - categorical_accuracy: 0.8898 - loss: 0.3823
Epoch 2/2000
[1m3/4[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m0s[0m 42ms/step - accuracy: 0.8299 - categorical_accuracy: 0.8299 - loss: 0.4221 

  current = self.get_monitor_value(logs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - accuracy: 0.8453 - categorical_accuracy: 0.8453 - loss: 0.3936
Epoch 3/2000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - accuracy: 0.8824 - categorical_accuracy: 0.8824 - loss: 0.3323
Epoch 4/2000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 5s/step - accuracy: 0.8721 - categorical_accuracy: 0.8721 - loss: 0.3114 
Epoch 5/2000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 136ms/step - accuracy: 0.9347 - categorical_accuracy: 0.9347 - loss: 0.2270
Epoch 6/2000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - accuracy: 0.9320 - categorical_accuracy: 0.9320 - loss: 0.2092
Epoch 7/2000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step - accuracy: 0.9247 - categorical_accuracy: 0.9247 - loss: 0.2213
Epoch 8/2000
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step - accuracy: 0.9239 - cate

KeyboardInterrupt: 

In [39]:
model.summary()

In [None]:
res = model.predict(x_test)
actions[np.argmax(res[2])]

In [None]:
actions[np.argmax(y_test[2])]

In [14]:
model.save("trainedModel.h5")



In [40]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

yhat = model.predict(x_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

multilabel_confusion_matrix(ytrue, yhat)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 522ms/step


array([[[5, 1],
        [0, 0]],

       [[3, 0],
        [1, 2]],

       [[5, 0],
        [1, 0]],

       [[3, 1],
        [0, 2]]], dtype=int64)

In [42]:
accuracy_score(ytrue, yhat)

0.6666666666666666

In [43]:
import mediapipe as mp
import numpy as np
import cv2

def handDetection(image, handDetector):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Converting to RGB because that's the color scheme mp uses
    image.flags.writeable = False
    results = handDetector.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Going back to BGR for opencv

    return image, results

def drawLandmarks(mpDrawing, image, hand_landmarks, mpHands):
    mpDrawing.draw_landmarks(image, hand_landmarks, mpHands.HAND_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(0, 0, 216), thickness=2, circle_radius=5),
                             mpDrawing.DrawingSpec(color=(0, 204, 255), thickness=2, circle_radius=1))

In [46]:
sequence = []

# Initializing Camera
cam = cv2.VideoCapture(1)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

# Loading the Hand Gesture Model
mpHands = mp.solutions.hands
handDetector = mpHands.Hands(
    static_image_mode="store_true",
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7
)
mpDrawing = mp.solutions.drawing_utils

while True:

    key = cv2.waitKey(1)
    if key == ord('q'):
        break

    ret, frame = cam.read()

    # Mirroring the frame TRY TO CHECK WHAT HAPPENS WITHOUT THIS LINE
    image = cv2.flip(frame, 1)

    # Mediapipe Hand Detection
    image, results = handDetection(image, handDetector)

    # Likely a bug in PyCharm for not recognizing this attribute
    # If a hand is detected
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Drawing Landmarks
            drawLandmarks(mpDrawing, image, hand_landmarks, mpHands)

            # Extracting key datapoints
            landmarks = np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark])
            # print(landmarks.shape)


            sequence.append(landmarks.flatten())
            sequence = sequence[-30:]           # Grab the last 30 frames

            if len(sequence) == 30:
                # sequence = np.expand_dims(np.reshape(sequence, (30, 63)), axis=0)
                # print(sequence.shape)
                # res = action(sequence)
                # print(res)
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                sequence = []
                print(res)
                print(actions[np.argmax(res)])

    cv2.imshow("Camera", image)