In [None]:
# pip install tensorflow opencv-python mediapipe sklearn matplotlib

In [2]:
import cv2
import mediapipe as mp
import numpy as np
import os
import time
from matplotlib import pyplot as plt

In [3]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [6]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)) 

In [7]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [19]:
import json

with open('archive/WLASL.json') as f:
    data = json.load(f)

counter2 = 0
for i in data:
    gloss = i['gloss']
    counter = 0
    for inst in i['instances']:
        videoID = inst['video_id']
        # check if video exists
        if os.path.isfile('archive/videos/' + videoID + '.mp4'):
            print("Video ID : ", videoID)
            with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
                cap = cv2.VideoCapture('archive/videos/' + videoID + '.mp4')
                frameRate = cap.get(2)
                filename = 0
                totalFrames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
                print("Total Frames: ", totalFrames)

                frameCounter = 0
                while (cap.isOpened()):
                    if frameCounter == 30:
                        break
                    frameId = cap.get(1)
                    ret, frame = cap.read()
                    if ret != True:
                        break
                    if (frameId % (int(totalFrames/30)) == 0):
                        image, results = mediapipe_detection(frame, holistic)
                        draw_styled_landmarks(image, results)
                        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                        keypoints = extract_keypoints(results)
                        frameId = int(frameId)
                        npy_path = os.path.join("train", gloss, str(counter) ,str(frameCounter) + ".npy")
                        if not os.path.exists(os.path.dirname(npy_path)):
                            os.makedirs(os.path.dirname(npy_path))
                        np.save(npy_path, keypoints)
                        frameCounter+=1
                counter+=1
        else:
            print("Video ID : ", videoID, " does not exist")
                    
    cap.release()
    cv2.destroyAllWindows()
    
    if counter2 == 2:
        break
    counter2+=1


Video ID :  69241
Total Frames:  75.0
Video ID :  65225  does not exist
Video ID :  68011  does not exist
Video ID :  68208  does not exist
Video ID :  68012  does not exist
Video ID :  70212  does not exist
Video ID :  70266  does not exist
Video ID :  07085  does not exist
Video ID :  07086  does not exist
Video ID :  07087  does not exist
Video ID :  07069
Total Frames:  30.0
Video ID :  07088  does not exist
Video ID :  07089  does not exist
Video ID :  07090  does not exist
Video ID :  07091  does not exist
Video ID :  07092  does not exist
Video ID :  07093  does not exist
Video ID :  07068
Total Frames:  68.0
Video ID :  07094  does not exist
Video ID :  07095  does not exist
Video ID :  07096  does not exist
Video ID :  07097  does not exist
Video ID :  07070
Total Frames:  86.0
Video ID :  07098  does not exist
Video ID :  07099
Total Frames:  87.0
Video ID :  07071  does not exist
Video ID :  07072  does not exist
Video ID :  07073  does not exist
Video ID :  67424  does not 

In [20]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [21]:
actions = np.array(["book", "computer", "drink"])
label_map = {label:num for num, label in enumerate(actions)}
label_map

{'book': 0, 'computer': 1, 'drink': 2}

In [37]:
sequences, labels = [], []
DATA_PATH = os.path.join('train') 
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        # check frames in each folder (sequence)
        frameCount = len(os.listdir(os.path.join(DATA_PATH, action, str(sequence))))
        for frameNum in range(frameCount):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), str(frameNum) + '.npy'))
            window.append(res) 
        sequences.append(window)
        labels.append(label_map[action])

In [38]:
np.array(sequences).shape

(30, 30, 1662)

In [39]:
np.array(labels).shape

(30,)

In [40]:
X = np.array(sequences)

In [41]:
y = to_categorical(labels).astype(int)

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

Build and Train LSTM Neural Network

In [43]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [44]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [48]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [49]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [50]:
model.fit(X_train, y_train, epochs=100, callbacks=[tb_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x24a122f1d00>

In [51]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 30, 64)            442112    
                                                                 
 lstm_7 (LSTM)               (None, 30, 128)           98816     
                                                                 
 lstm_8 (LSTM)               (None, 64)                49408     
                                                                 
 dense_6 (Dense)             (None, 64)                4160      
                                                                 
 dense_7 (Dense)             (None, 32)                2080      
                                                                 
 dense_8 (Dense)             (None, 3)                 99        
                                                                 
Total params: 596,675
Trainable params: 596,675
Non-tr

In [52]:
res = model.predict(X_test)



In [53]:
actions[np.argmax(res[4])]

IndexError: index 4 is out of bounds for axis 0 with size 2

In [None]:
actions[np.argmax(y_test[4])]

In [54]:
model.save('action.h5')

In [55]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [56]:
yhat = model.predict(X_test)



In [57]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [58]:
accuracy_score(ytrue, yhat)

0.5