In [None]:
# pip install tensorflow opencv-python mediapipe sklearn matplotlib

In [6]:
import cv2
import mediapipe as mp
import numpy as np
import os
import time
from matplotlib import pyplot as plt

In [7]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [8]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [9]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)) 

In [10]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [44]:
import json

with open('archive/WLASL.json') as f:
    data = json.load(f)

counter2 = 0
for i in data:
    gloss = i['gloss']
    counter = 0
    for inst in i['instances']:
        videoID = inst['video_id']
        # check if video exists
        if os.path.isfile('archive/videos/' + videoID + '.mp4'):
            print("Video ID : ", videoID)
            with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
                cap = cv2.VideoCapture('archive/videos/' + videoID + '.mp4')
                frameRate = cap.get(2)
                filename = 0
                while (cap.isOpened()):
                    frameId = cap.get(1)
                    ret, frame = cap.read()
                    if ret != True:
                        break
                    # take only 30 frames
                    print("Frame Rate: ", frameRate)
                    print("Frame ID: ", frameId)
                    if (frameId % ((int(frameRate)+1)) == 0):
                        image, results = mediapipe_detection(frame, holistic)
                        draw_styled_landmarks(image, results)
                        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                        keypoints = extract_keypoints(results)
                        frameId = int(frameId)
                        npy_path = os.path.join("train", gloss, str(counter) ,str(frameId) + ".npy")
                        if not os.path.exists(os.path.dirname(npy_path)):
                            os.makedirs(os.path.dirname(npy_path))
                        np.save(npy_path, keypoints)
                counter+=1
        else:
            print("Video ID : ", videoID, " does not exist")
                    
    cap.release()
    cv2.destroyAllWindows()
    
    if counter2 == 2:
        break
    counter2+=1


Video ID :  69241
Frame Rate:  3.3333333333333335e-05
Frame ID:  0.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  1.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  2.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  3.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  4.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  5.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  6.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  7.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  8.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  9.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  10.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  11.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  12.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  13.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  14.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  15.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  16.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  17.0
Frame Rate:  3.3333333333333335e-05
Frame ID:  18.0
Fram

KeyboardInterrupt: 

In [36]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [37]:
actions = np.array(["book", "computer", "drink"])
label_map = {label:num for num, label in enumerate(actions)}
label_map

{'book': 0, 'computer': 1, 'drink': 2}

In [41]:
sequences, labels = [], []
DATA_PATH = os.path.join('train') 
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        # check frames in each folder (sequence)
        frameCount = len(os.listdir(os.path.join(DATA_PATH, action, str(sequence))))
        for frameNum in range(frameCount):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), str(frameNum) + '.npy'))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [42]:
np.array(sequences).shape

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (35,) + inhomogeneous part.

In [19]:
np.array(labels).shape

(35,)

In [21]:
X = np.array(sequences)

In [22]:
y = to_categorical(labels).astype(int)

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

Build and Train LSTM Neural Network

In [24]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [25]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [30]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [31]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [32]:
model.fit(X_train, y_train, epochs=100, callbacks=[tb_callback])

Epoch 1/100


ValueError: in user code:

    File "C:\Users\moham\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\moham\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\moham\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\moham\AppData\Roaming\Python\Python39\site-packages\keras\engine\training.py", line 1023, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\moham\AppData\Roaming\Python\Python39\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\moham\AppData\Roaming\Python\Python39\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_2" is incompatible with the layer: expected shape=(None, 30, 1662), found shape=(None, 1662)
