In [1]:
%pip install tensorflow opencv-python mediapipe matplotlib scikit-learn pandas





In [2]:
import cv2 as cv
import os
import time
import numpy as np
import mediapipe as mp
from matplotlib import pyplot as plt
import tensorflow as tf 

In [3]:
mpHolistic = mp.solutions.holistic
mpDrawing = mp.solutions.drawing_utils

In [4]:
def mediapipeDetection(image, model):
  image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
  image.flags.writeable = False
  results = model.process(image)
  image = cv.cvtColor(image, cv.COLOR_RGB2BGR)
  image.flags.writeable = True
  return image, results

In [5]:
def drawLandmarks(image, results):
    FACE_CONNECTIONS = [
    (10, 338), (338, 297), (297, 332), (332, 284), (284, 251), (251, 389), (389, 356), (356, 454), 
    (454, 323), (323, 361), (361, 288), (288, 397), (397, 365), (365, 379), (379, 378), (378, 400), 
    (400, 377), (377, 152), (152, 148), (148, 176), (176, 149), (149, 150), (150, 136), (136, 172), 
    (172, 58), (58, 132), (132, 93), (93, 234), (234, 127), (127, 162), (162, 21), (21, 54), (54, 103), 
    (103, 67), (67, 109), (109, 10)
    ]
    mpDrawing.draw_landmarks(image, results.face_landmarks,FACE_CONNECTIONS)
    mpDrawing.draw_landmarks(image, results.pose_landmarks, mpHolistic.POSE_CONNECTIONS)
    mpDrawing.draw_landmarks(image, results.right_hand_landmarks, mpHolistic.HAND_CONNECTIONS)
    mpDrawing.draw_landmarks(image, results.left_hand_landmarks, mpHolistic.HAND_CONNECTIONS)

In [6]:
def drawStyledLandmarks(image, results):
    FACE_CONNECTIONS = [
    (10, 338), (338, 297), (297, 332), (332, 284), (284, 251), (251, 389), (389, 356), (356, 454), 
    (454, 323), (323, 361), (361, 288), (288, 397), (397, 365), (365, 379), (379, 378), (378, 400), 
    (400, 377), (377, 152), (152, 148), (148, 176), (176, 149), (149, 150), (150, 136), (136, 172), 
    (172, 58), (58, 132), (132, 93), (93, 234), (234, 127), (127, 162), (162, 21), (21, 54), (54, 103), 
    (103, 67), (67, 109), (109, 10)
    ]
    mpDrawing.draw_landmarks(image, results.face_landmarks,FACE_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                             mpDrawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))
    mpDrawing.draw_landmarks(image, results.pose_landmarks, mpHolistic.POSE_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                             mpDrawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2))
    mpDrawing.draw_landmarks(image, results.right_hand_landmarks, mpHolistic.HAND_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mpDrawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2))
    mpDrawing.draw_landmarks(image, results.left_hand_landmarks, mpHolistic.HAND_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mpDrawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2))

In [7]:
cap = cv.VideoCapture(0)
with mpHolistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
  while cap.isOpened():
    ret, frame = cap.read()
    image, results = mediapipeDetection(frame, holistic)
    drawStyledLandmarks(image, results)
    cv.imshow("OpenCV Feed", image)
    if cv.waitKey(10) & 0xFF == ord('q'):
      break
  cap.release()
  cv.destroyAllWindows()



In [8]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(132)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
    return np.concatenate([pose,face,lh,rh])

In [68]:
extract_keypoints(results)

array([ 0.        ,  0.        ,  0.        , ...,  0.41166395,
        0.27048272, -0.00679371])

In [10]:
DATA_PATH = os.path.join('MP_Data')


In [40]:
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True, activation="relu", input_shape = ()),
    tf.keras.layers.LSTM(128, return_sequences=True, activation="relu"),
    tf.keras.layers.LSTM(64, return_sequences=False, activation="relu"),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(32, activation="relu"),
    tf.keras.layers.Dense(actions.shape[0], activation="softmax")
])



  super().__init__(**kwargs)


ValueError: Input 0 of layer "lstm" is incompatible with the layer: expected ndim=3, found ndim=1. Full shape received: (None,)

In [11]:
import pandas as pd
import json 
import re

In [74]:
main_path = "A:\\archive (1)\\"
wlasl_df = pd.read_json(main_path + "WLASL_v0.3.json")

In [75]:
wlasl_df.shape

(2000, 2)

In [76]:
def getVideoIDs(jsonList):
    videoIDs = []
    for i in jsonList:
        videoID = i['video_id']
        if os.path.exists(f'{main_path}videos/{videoID}.mp4'):
            videoIDs.append(videoID)
    return videoIDs

In [77]:
with open(main_path+'WLASL_v0.3.json', 'r') as dataFile:
    jsonData = dataFile.read()

instance_json = json.loads(jsonData)

In [78]:
getVideoIDs(instance_json[0]['instances']) 

['69241', '07069', '07068', '07070', '07099', '07074']

In [79]:
wlasl_df["video_ids"] = wlasl_df["instances"].apply(getVideoIDs)

In [80]:
wlasl_df

Unnamed: 0,gloss,instances,video_ids
0,book,"[{'bbox': [385, 37, 885, 720], 'fps': 25, 'fra...","[69241, 07069, 07068, 07070, 07099, 07074]"
1,drink,"[{'bbox': [551, 68, 1350, 1080], 'fps': 25, 'f...","[69302, 65539, 17710, 17733, 65540, 17734, 177..."
2,computer,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[12328, 12312, 12311, 12338, 12313, 12314, 123..."
3,before,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[05728, 05749, 05750, 05729, 05730, 65167, 057..."
4,chair,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[09848, 09869, 09849, 09850, 09851, 65328, 09854]"
...,...,...,...
1995,washington,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[62393, 62394, 62395, 62396, 62398]"
1996,waterfall,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[62488, 62489, 62490, 62492, 62493]"
1997,weigh,"[{'bbox': [0, 0, 360, 240], 'fps': 25, 'frame_...","[62782, 62783, 62785]"
1998,wheelchair,"[{'bbox': [415, 86, 1811, 1080], 'fps': 25, 'f...","[63044, 63046, 63047, 63050]"


In [81]:
actions= np.array(wlasl_df['gloss'][:4])

In [82]:
actions

array(['book', 'drink', 'computer', 'before'], dtype=object)

In [83]:
DATA_PATH = os.path.join('MP_Data')

In [84]:
for action, videos in zip(actions, wlasl_df['video_ids']):
    for sequence, video_id in enumerate(videos):
        directory_path = os.path.join(DATA_PATH, action, str(sequence))
        os.makedirs(directory_path, exist_ok=True)


In [1]:
# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Define actions, wlasl_df, DATA_PATH, labelMap variables
# Assuming these variables are defined elsewhere in your code

# Loop through actions and videos
for action, videos in zip(actions, wlasl_df['video_ids']):
    for sequence, video_id in enumerate(videos):
        video_path = f"A:\\archive (1)\\videos\\{video_id}.mp4"
        cap = cv.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Error opening video file: {video_path}")
            continue
        
        total_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
        print(f"Video ID: {video_id}, Total Frames: {total_frames}, Action: {action}, Sequence: {sequence}")

        # Create action and sequence folders if they don't exist
        action_path = os.path.join(DATA_PATH, action)
        sequence_path = os.path.join(action_path, str(sequence))
        os.makedirs(sequence_path, exist_ok=True)

        # Process each frame in the video
        for frame_number in range(total_frames):
            ret, frame = cap.read()
            if not ret:
                print("End of video.")
                break
            
            # Process frame using MediaPipe Holistic
            image, results = mediapipeDetection(frame, mp_holistic)
            keypoints = extract_keypoints(results)
            print(keypoints)
            # Save keypoints to NumPy array
            npy_path = os.path.join(sequence_path, f"{frame_number}.npy")
            np.save(npy_path, keypoints)

            cv.imshow("OpenCV Feed", image)
            if cv.waitKey(10) & 0xFF == ord('q'):
                break

            if cv.waitKey(10) & 0xFF == ord('t'):
                cap.release()
                cv.destroyAllWindows()
                exit()

        cap.release()

cv.destroyAllWindows()

NameError: name 'mp' is not defined

In [69]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [70]:
labelMap = {label:num for num,label in enumerate(actions)}

In [71]:
labelMap

{'book': 0, 'drink': 1, 'computer': 2, 'before': 3}

In [73]:
sequences = []
labels = []

for action, videos in zip(actions, wlasl_df['video_ids']):
    for sequence, video_id in enumerate(videos):
        video_path = f"A:\\archive (1)\\videos\\{video_id}.mp4"
        
        # Create a VideoCapture object to open the video file
        cap = cv.VideoCapture(video_path)
        
        # Check if the VideoCapture object is opened successfully
        if not cap.isOpened():
            print(f"Error opening video file: {video_path}")
            continue
        
        # Get the total number of frames in the video
        total_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
        print(f"Video ID: {video_id}, Total Frames: {total_frames}")
        
        window = []  # Initialize window list for each video sequence
        
        # Process each frame in the video
        for frame_number in range(total_frames):
            # Load the previously saved NumPy file
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), f"{frame_number}.npy"))
            window.append(res)
        
        # Append the window (list of frame paths) to sequences
        sequences.append(window)
        
        # Append the action label to labels using the labelMap
        labels.append(labelMap[action])

        # Release the VideoCapture object
        cap.release()

# Print the length of sequences and labels to check if they are populated
print("Sequences Length:", len(sequences))
print("Labels Length:", len(labels))

Video ID: 69241, Total Frames: 75
Video ID: 07069, Total Frames: 30
Video ID: 07068, Total Frames: 68
Video ID: 07070, Total Frames: 86
Video ID: 07099, Total Frames: 87
Video ID: 07074, Total Frames: 41


FileNotFoundError: [Errno 2] No such file or directory: 'MP_Data\\book\\5\\38.npy'

In [35]:
sequences

[['MP_Data\\book\\0\\0.npy',
  'MP_Data\\book\\0\\1.npy',
  'MP_Data\\book\\0\\2.npy',
  'MP_Data\\book\\0\\3.npy',
  'MP_Data\\book\\0\\4.npy',
  'MP_Data\\book\\0\\5.npy',
  'MP_Data\\book\\0\\6.npy',
  'MP_Data\\book\\0\\7.npy',
  'MP_Data\\book\\0\\8.npy',
  'MP_Data\\book\\0\\9.npy',
  'MP_Data\\book\\0\\10.npy',
  'MP_Data\\book\\0\\11.npy',
  'MP_Data\\book\\0\\12.npy',
  'MP_Data\\book\\0\\13.npy',
  'MP_Data\\book\\0\\14.npy',
  'MP_Data\\book\\0\\15.npy',
  'MP_Data\\book\\0\\16.npy',
  'MP_Data\\book\\0\\17.npy',
  'MP_Data\\book\\0\\18.npy',
  'MP_Data\\book\\0\\19.npy',
  'MP_Data\\book\\0\\20.npy',
  'MP_Data\\book\\0\\21.npy',
  'MP_Data\\book\\0\\22.npy',
  'MP_Data\\book\\0\\23.npy',
  'MP_Data\\book\\0\\24.npy',
  'MP_Data\\book\\0\\25.npy',
  'MP_Data\\book\\0\\26.npy',
  'MP_Data\\book\\0\\27.npy',
  'MP_Data\\book\\0\\28.npy',
  'MP_Data\\book\\0\\29.npy',
  'MP_Data\\book\\0\\30.npy',
  'MP_Data\\book\\0\\31.npy',
  'MP_Data\\book\\0\\32.npy',
  'MP_Data\\book\\0\