In [28]:
%pip install tensorflow opencv-python mediapipe matplotlib scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [29]:
import cv2 as cv
import os
import time
import numpy as np
import mediapipe as mp
from matplotlib import pyplot as plt
import tensorflow as tf 

In [30]:
mpHolistic = mp.solutions.holistic
mpDrawing = mp.solutions.drawing_utils

In [31]:
def mediapipeDetection(image, model):
  image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
  image.flags.writeable = False
  results = model.process(image)
  image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
  image.flags.writeable = True
  return image, results

In [32]:
def drawLandmarks(image, results):
    FACE_CONNECTIONS = [
    (10, 338), (338, 297), (297, 332), (332, 284), (284, 251), (251, 389), (389, 356), (356, 454), 
    (454, 323), (323, 361), (361, 288), (288, 397), (397, 365), (365, 379), (379, 378), (378, 400), 
    (400, 377), (377, 152), (152, 148), (148, 176), (176, 149), (149, 150), (150, 136), (136, 172), 
    (172, 58), (58, 132), (132, 93), (93, 234), (234, 127), (127, 162), (162, 21), (21, 54), (54, 103), 
    (103, 67), (67, 109), (109, 10)
    ]
    mpDrawing.draw_landmarks(image, results.face_landmarks,FACE_CONNECTIONS)
    mpDrawing.draw_landmarks(image, results.pose_landmarks, mpHolistic.POSE_CONNECTIONS)
    mpDrawing.draw_landmarks(image, results.right_hand_landmarks, mpHolistic.HAND_CONNECTIONS)
    mpDrawing.draw_landmarks(image, results.left_hand_landmarks, mpHolistic.HAND_CONNECTIONS)

In [33]:
def drawStyledLandmarks(image, results):
    FACE_CONNECTIONS = [
    (10, 338), (338, 297), (297, 332), (332, 284), (284, 251), (251, 389), (389, 356), (356, 454), 
    (454, 323), (323, 361), (361, 288), (288, 397), (397, 365), (365, 379), (379, 378), (378, 400), 
    (400, 377), (377, 152), (152, 148), (148, 176), (176, 149), (149, 150), (150, 136), (136, 172), 
    (172, 58), (58, 132), (132, 93), (93, 234), (234, 127), (127, 162), (162, 21), (21, 54), (54, 103), 
    (103, 67), (67, 109), (109, 10)
    ]
    mpDrawing.draw_landmarks(image, results.face_landmarks,FACE_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                             mpDrawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))
    mpDrawing.draw_landmarks(image, results.pose_landmarks, mpHolistic.POSE_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                             mpDrawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2))
    mpDrawing.draw_landmarks(image, results.right_hand_landmarks, mpHolistic.HAND_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mpDrawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2))
    mpDrawing.draw_landmarks(image, results.left_hand_landmarks, mpHolistic.HAND_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mpDrawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

In [34]:
cap = cv.VideoCapture(0)
with mpHolistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
  while cap.isOpened():
    ret, frame = cap.read()
    image, results = mediapipeDetection(frame, holistic)
    drawStyledLandmarks(image, results)
    cv.imshow("OpenCV Feed", image)
    if cv.waitKey(10) & 0xFF == ord('q'):
      break
  cap.release()
  cv.destroyAllWindows()

In [35]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(132)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
    return np.concatenate([pose,face,lh,rh])

In [36]:
extract_keypoints(results).shape

(1662,)

In [37]:
DATA_PATH = os.path.join('MP_Data')
actions = np.array(['hello', 'thanks','i love you'])
noSequences = 30
sequenceLength = 30

In [38]:
for action in actions:
    for sequence in range(noSequences):
        try:
            os.makedirs(os.path.join(DATA_PATH,action,str(sequence)))
        except:
            pass

In [40]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True, activation="relu", input_shape = ()),
    tf.keras.layers.LSTM(128, return_sequences=True, activation="relu"),
    tf.keras.layers.LSTM(64, return_sequences=False, activation="relu"),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(actions.shape[0], activation="softmax")
])



  super().__init__(**kwargs)


ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=1. Full shape received: (None,)

In [41]:
import pandas as pd
import json 
import re

In [42]:
main_path = "A:\\archive (1)\\"
wlasl_df = pd.read_json(main_path + "WLASL_v0.3.json")

In [43]:
wlasl_df.shape

(2000, 2)

In [44]:
def getVideoIDs(jsonList):
    videoIDs = []
    for i in jsonList:
        videoID = i['video_id']
        if os.path.exists(f'{main_path}videos/{videoID}.mp4'):
            videoIDs.append(videoID)
    return videoIDs

In [45]:
with open(main_path+'WLASL_v0.3.json', 'r') as dataFile:
    jsonData = dataFile.read()

instance_json = json.loads(jsonData)

In [46]:
getVideoIDs(instance_json[0]['instances']) 

['69241', '07069', '07068', '07070', '07099', '07074']

In [47]:
wlasl_df["video_ids"] = wlasl_df["instances"].apply(getVideoIDs)

In [48]:
wlasl_df

Unnamed: 0,gloss,instances,video_ids
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra...","[69241, 07069, 07068, 07070, 07099, 07074]"
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f...","[69302, 65539, 17710, 17733, 65540, 17734, 177..."
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[12328, 12312, 12311, 12338, 12313, 12314, 123..."
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[05728, 05749, 05750, 05729, 05730, 65167, 057..."
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[09848, 09869, 09849, 09850, 09851, 65328, 09854]"
...,...,...,...
1995,washington,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[62393, 62394, 62395, 62396, 62398]"
1996,waterfall,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[62488, 62489, 62490, 62492, 62493]"
1997,weigh,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[62782, 62783, 62785]"
1998,wheelchair,"[{'bbox': [415, 86, 1811, 1080], 'fps': 25, 'f...","[63044, 63046, 63047, 63050]"


In [49]:
actions= np.array(wlasl_df['gloss'][:10])

In [50]:
DATA_PATH = os.path.join('MP_Data')

In [51]:
for action, videos in zip(actions, wlasl_df['video_ids']):
    for sequence, video_id in enumerate(videos):
        directory_path = os.path.join(DATA_PATH, action, str(sequence))
        os.makedirs(directory_path, exist_ok=True)


In [59]:
with mpHolistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for action, videos in zip(actions, wlasl_df['video_ids']):
        for sequence, video_id in enumerate(videos):
            video_path = f"A:\\archive (1)\\videos\\{video_id}.mp4"
            cap = cv.VideoCapture(video_path)
            if not cap.isOpened():
                print(f"Error opening video file: {video_path}")
                continue
            
            total_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
            print(f"Video ID: {video_id}, Total Frames: {total_frames}")

            i = 0
            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    print("End of video.")
                    break

                # Process each frame using MediaPipe Holistic
                image, results = mediapipeDetection(frame, holistic)
                drawStyledLandmarks(image, results)
                keypoints = extract_keypoints(results)
                npyPath = os.path.join(DATA_PATH, action, str(sequence), str(i))
                np.save(npyPath, keypoints)
                i += 1

                cv.imshow("OpenCV Feed", image)
                if cv.waitKey(10) & 0xFF == ord('q'):
                    break

                # Check for "Quit" button press (Press 'q' to quit)
                if cv.waitKey(10) & 0xFF == ord('Q'):
                    cap.release()
                    cv.destroyAllWindows()
                    exit()

            cap.release()

cv.destroyAllWindows()

Video ID: 69241, Total Frames: 75
Video ID: 07069, Total Frames: 30
End of video.
Video ID: 07068, Total Frames: 68
End of video.
Video ID: 07070, Total Frames: 86
End of video.
Video ID: 07099, Total Frames: 87
End of video.
Video ID: 07074, Total Frames: 41
End of video.
Video ID: 69302, Total Frames: 77
End of video.
Video ID: 65539, Total Frames: 44
End of video.
Video ID: 17710, Total Frames: 70
End of video.
Video ID: 17733, Total Frames: 93
End of video.
Video ID: 65540, Total Frames: 46
End of video.
Video ID: 17734, Total Frames: 89
End of video.
Video ID: 17711, Total Frames: 81
End of video.
Video ID: 17712, Total Frames: 43
End of video.
Video ID: 17713, Total Frames: 91
End of video.
Video ID: 17709, Total Frames: 105
End of video.
Video ID: 17720, Total Frames: 41
End of video.
Video ID: 17721, Total Frames: 49
End of video.
Video ID: 17722, Total Frames: 49
End of video.
Video ID: 17723, Total Frames: 60
End of video.
Video ID: 17724, Total Frames: 30
End of video.
Video

In [None]:
from sklearn.model_selection import train_test_split
from