In [47]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [48]:
actions = ['chop', 'pinch']

DATA_PATH = 'data_npy'

for action in actions: 
    for sequence in range(30):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [49]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [50]:
def mediapipe_detection(image, holistic_model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = holistic_model.process(image)        # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [51]:
def draw_styled_landmarks(image, results):
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [52]:
def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([lh, rh])

In [53]:

# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    for action in actions:
        for sequence in range(30):
            for frameCounter in range(30):
                
                cap = cv2.VideoCapture(f'data_backup/{action}/video_{sequence}.avi')
                framesCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                skipFrame = max(int(framesCount/30), 1)
                            
                cap.set(cv2.CAP_PROP_POS_FRAMES, frameCounter * skipFrame)
                
                ret, frame = cap.read()
                
                if not ret:
                    break
                
                image, results = mediapipe_detection(frame, holistic)
                
                draw_styled_landmarks(image, results)
                
                cv2.imshow('OpenCV Feed', image)
                
                keypoints = extract_keypoints(results)
                npy_path = os.path.join('data_npy', action, str(sequence), str(frameCounter))
                np.save(npy_path, keypoints)
                
                print(f'{action}: saving frame {frameCounter} of video {sequence}')
                
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
            
            cap.release()
    cv2.destroyAllWindows()



chop: saving frame 0 of video 0
chop: saving frame 1 of video 0
chop: saving frame 2 of video 0
chop: saving frame 3 of video 0
chop: saving frame 4 of video 0
chop: saving frame 5 of video 0
chop: saving frame 6 of video 0
chop: saving frame 7 of video 0
chop: saving frame 8 of video 0
chop: saving frame 9 of video 0
chop: saving frame 10 of video 0
chop: saving frame 11 of video 0
chop: saving frame 12 of video 0
chop: saving frame 13 of video 0
chop: saving frame 14 of video 0
chop: saving frame 15 of video 0
chop: saving frame 16 of video 0
chop: saving frame 17 of video 0
chop: saving frame 18 of video 0
chop: saving frame 19 of video 0
chop: saving frame 20 of video 0
chop: saving frame 21 of video 0
chop: saving frame 22 of video 0
chop: saving frame 23 of video 0
chop: saving frame 24 of video 0
chop: saving frame 25 of video 0
chop: saving frame 26 of video 0
chop: saving frame 27 of video 0
chop: saving frame 28 of video 0
chop: saving frame 29 of video 0
chop: saving frame 0

In [54]:
label_map = {label:num for num, label in enumerate(actions)}
label_map

{'chop': 0, 'pinch': 1}

In [55]:
sequences, labels = [], []
for action in actions:
    for sequence in range(30):
        window = []
        for frame_num in range(30):
            res = np.load(os.path.join('data_npy', action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [56]:
X = np.array(sequences)
y = np.array(labels)

print(X.shape)
print(y.shape)

y = to_categorical(labels).astype(int)
# print(y.shape)

# y = y.argmax(axis=1)

(60, 30, 126)
(60,)


In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)
y_test.shape

(3, 2)

In [58]:
# log_dir = os.path.join('Logs')
# tb_callback = TensorBoard(log_dir=log_dir)

In [59]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,126)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.summary()

In [60]:
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [61]:
model.fit(X_train, y_train, epochs=150, shuffle=True, validation_split=0.2)

Epoch 1/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 310ms/step - accuracy: 0.3292 - loss: 0.6949 - val_accuracy: 0.9167 - val_loss: 0.6785
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.9347 - loss: 0.6741 - val_accuracy: 0.9167 - val_loss: 0.6484
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.9347 - loss: 0.6375 - val_accuracy: 0.9167 - val_loss: 0.5519
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.9451 - loss: 0.5301 - val_accuracy: 0.9167 - val_loss: 0.3857
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.9199 - loss: 0.4340 - val_accuracy: 1.0000 - val_loss: 0.4616
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.9600 - loss: 0.3774 - val_accuracy: 0.9167 - val_loss: 0.3715
Epoch 7/150
[1m2/2[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x22bc63c92e0>

In [64]:
res = model.predict(X_test)
actions[np.argmax(res[2])]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


'pinch'