In [None]:
# method to mount data from google drive
from google.colab import drive
drive.mount('/content/drive')

# MediaPipe Landmark Extraction

In [None]:
!pip install numpy
!pip install mediapipe==0.10.9

In [None]:
import mediapipe as mp
import cv2
import time
import numpy as np
import os

In [None]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [None]:
def draw_landmarks(image, results):
    mp_holistic = mp.solutions.holistic  # Holistic model
    mp_drawing = mp.solutions.drawing_utils  # Drawing utilities
    # Draw left hand connections
    image = mp_drawing.draw_landmarks(
            image,
            landmark_list=results.left_hand_landmarks,
            connections=mp_holistic.HAND_CONNECTIONS,
            landmark_drawing_spec=mp_drawing.DrawingSpec(
                color=(232, 254, 255), thickness=1, circle_radius=4
            ),
            connection_drawing_spec=mp_drawing.DrawingSpec(
                color=(255, 249, 161), thickness=2, circle_radius=2
            ),
    )
    # Draw right hand connections
    image = mp_drawing.draw_landmarks(
            image,
            landmark_list=results.right_hand_landmarks,
            connections=mp_holistic.HAND_CONNECTIONS,
            landmark_drawing_spec=mp_drawing.DrawingSpec(
                color=(232, 254, 255), thickness=1, circle_radius=4
            ),
            connection_drawing_spec=mp_drawing.DrawingSpec(
                color=(255, 249, 161), thickness=2, circle_radius=2
            ),
    )
    return image

Split the dataset to Train and Test

In [None]:
import os
import shutil
import random

video_dir = "DTW Dataset/MSL_Basic" # path to the MSL_Basic dataset
output_dir = "DTW Dataset/MSL_Basic/" # path to the MSL_Basic dataset

train_dir = os.path.join(output_dir, 'train')
test_dir = os.path.join(output_dir, 'test')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

train_ratio = 0.8
test_ratio = 0.2

for class_name in os.listdir(video_dir):
    class_path = os.path.join(video_dir, class_name)
    if not os.path.isdir(class_path):
        continue

    files = [f for f in os.listdir(class_path) if f.endswith(('.mp4', 'mov', 'mkv'))]
    random.shuffle(files)

    split_index = int(len(files) * train_ratio)
    train_files = files[:split_index]
    test_files = files[split_index:]

    # Create subfolders for this class in train and test
    train_class_dir = os.path.join(train_dir, class_name)
    test_class_dir = os.path.join(test_dir, class_name)
    os.makedirs(train_class_dir, exist_ok=True)
    os.makedirs(test_class_dir, exist_ok=True)

    # Move/copy files to the appropriate folders
    for f in train_files:
        shutil.copy(os.path.join(class_path, f), os.path.join(train_class_dir, f))
    for f in test_files:
        shutil.copy(os.path.join(class_path, f), os.path.join(test_class_dir, f))

    print(f"Class '{class_name}': {len(train_files)} train, {len(test_files)} test files.")

Extract landmarks for training

In [None]:
import cv2
import os
import numpy as np
import mediapipe as mp

# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
holistic_model = mp_holistic.Holistic(static_image_mode=False)

# Paths
dataset_path = "DTW Dataset/MSL_Basic/train/" # path to the MSL_Basic train dataset
output_base = "DTW Dataset/MSL_Basic_Processed/train/" # path to the MSL_Basic_Processed train dataset

os.makedirs(output_base, exist_ok=True)

for class_name in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, class_name)
    output_class_path = os.path.join(output_base, class_name)
    os.makedirs(output_class_path, exist_ok=True)

    if not os.path.isdir(class_path):
        continue

    print(f"Processing class folder: {class_name}")

    for video_file in os.listdir(class_path):
        if not video_file.endswith((".mp4", ".mov", ".mkv")):
            continue

        video_path = os.path.join(class_path, video_file)
        output_video_path = os.path.join(output_class_path, f"processed_{video_file}")
        npy_output_path = os.path.join(output_class_path, f"{os.path.splitext(video_file)[0]}.npy")

        print(f"Processing video: {video_path}")
        capture = cv2.VideoCapture(video_path)
        if not capture.isOpened():
            print(f"Failed to open video {video_path}")
            continue

        # Output video writer setup
        frame_width = 800
        frame_height = 600
        fps = capture.get(cv2.CAP_PROP_FPS)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

        # Store landmarks
        left_hand_sequence = []
        right_hand_sequence = []

        while capture.isOpened():
            ret, frame = capture.read()
            if not ret:
                break

            frame = cv2.resize(frame, (frame_width, frame_height))
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False
            results = holistic_model.process(image)
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            # Draw and extract landmarks
            if results.face_landmarks:
                mp_drawing.draw_landmarks(
                    image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                    mp_drawing.DrawingSpec(color=(255, 0, 255), thickness=1, circle_radius=1),
                    mp_drawing.DrawingSpec(color=(0, 255, 255), thickness=1, circle_radius=1)
                )

            if results.right_hand_landmarks:
                mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
                right = [[lm.x, lm.y, lm.z] for lm in results.right_hand_landmarks.landmark]
            else:
                right = [[0, 0, 0]] * 21  # 21 keypoints

            if results.left_hand_landmarks:
                mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
                left = [[lm.x, lm.y, lm.z] for lm in results.left_hand_landmarks.landmark]
            else:
                left = [[0, 0, 0]] * 21

            left_hand_sequence.append(left)
            right_hand_sequence.append(right)

            out.write(image)

        capture.release()
        out.release()

        # Save landmarks as .npy
        landmarks_data = {
            "left": np.array(left_hand_sequence),     # shape: (frames, 21, 3)
            "right": np.array(right_hand_sequence),   # shape: (frames, 21, 3)
            "label": class_name
        }
        np.save(npy_output_path, landmarks_data)
        print(f"Saved: {output_video_path} and {npy_output_path}")

# Cleanup
holistic_model.close()


Extract landmark for testing

In [None]:
import cv2
import os
import numpy as np
import mediapipe as mp

# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
holistic_model = mp_holistic.Holistic(static_image_mode=False)

# Paths
dataset_path = "DTW Dataset/MSL_Basic/test/" # path to the MSL_Basic test dataset
output_base = "DTW Dataset/MSL_Basic_Processed/test/" # path to the MSL_Basic_Processed test dataset

os.makedirs(output_base, exist_ok=True)

for class_name in os.listdir(dataset_path):
    class_path = os.path.join(dataset_path, class_name)
    output_class_path = os.path.join(output_base, class_name)
    os.makedirs(output_class_path, exist_ok=True)

    if not os.path.isdir(class_path):
        continue

    print(f"Processing class folder: {class_name}")

    for video_file in os.listdir(class_path):
        if not video_file.endswith((".mp4", ".mov", ".mkv")):
            continue

        video_path = os.path.join(class_path, video_file)
        output_video_path = os.path.join(output_class_path, f"processed_{video_file}")
        npy_output_path = os.path.join(output_class_path, f"{os.path.splitext(video_file)[0]}.npy")

        print(f"Processing video: {video_path}")
        capture = cv2.VideoCapture(video_path)
        if not capture.isOpened():
            print(f"Failed to open video {video_path}")
            continue

        # Output video writer setup
        frame_width = 800
        frame_height = 600
        fps = capture.get(cv2.CAP_PROP_FPS)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

        # Store landmarks
        left_hand_sequence = []
        right_hand_sequence = []

        while capture.isOpened():
            ret, frame = capture.read()
            if not ret:
                break

            frame = cv2.resize(frame, (frame_width, frame_height))
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False
            results = holistic_model.process(image)
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            # Draw and extract landmarks
            if results.face_landmarks:
                mp_drawing.draw_landmarks(
                    image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                    mp_drawing.DrawingSpec(color=(255, 0, 255), thickness=1, circle_radius=1),
                    mp_drawing.DrawingSpec(color=(0, 255, 255), thickness=1, circle_radius=1)
                )

            if results.right_hand_landmarks:
                mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
                right = [[lm.x, lm.y, lm.z] for lm in results.right_hand_landmarks.landmark]
            else:
                right = [[0, 0, 0]] * 21  # 21 keypoints

            if results.left_hand_landmarks:
                mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
                left = [[lm.x, lm.y, lm.z] for lm in results.left_hand_landmarks.landmark]
            else:
                left = [[0, 0, 0]] * 21

            left_hand_sequence.append(left)
            right_hand_sequence.append(right)

            out.write(image)

        capture.release()
        out.release()

        # Save landmarks as .npy
        landmarks_data = {
            "left": np.array(left_hand_sequence),     # shape: (frames, 21, 3)
            "right": np.array(right_hand_sequence),   # shape: (frames, 21, 3)
            "label": class_name
        }
        np.save(npy_output_path, landmarks_data)
        print(f"Saved: {output_video_path} and {npy_output_path}")

# Cleanup
holistic_model.close()


# DTW

In [None]:
!pip install fastdtw

In [None]:
import numpy as np
import os

train_dir = "DTW Dataset/MSL_Basic_Processed/train/" # path to the MSL_Basic_Processed train dataset
train_samples = []

# Load each .npy file from class subfolders
for class_name in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_name)
    for file_name in os.listdir(class_path):
        if file_name.endswith(".npy"):
            data = np.load(os.path.join(class_path, file_name), allow_pickle=True).item()
            left = data["left"].reshape(-1, 63)   # (T, 63)
            right = data["right"].reshape(-1, 63) # (T, 63)
            label = data["label"]
            train_samples.append({"left": left, "right": right, "label": label})


In [None]:
train_samples

In [None]:
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean

def compute_total_dtw(test_left, test_right, train_left, train_right):
    # DTW for each hand
    dist_left, _ = fastdtw(test_left, train_left, dist=euclidean)
    dist_right, _ = fastdtw(test_right, train_right, dist=euclidean)
    return dist_left + dist_right

In [None]:
def predict_sign(test_data, train_samples):
    test_left = test_data["left"].reshape(-1, 63)
    test_right = test_data["right"].reshape(-1, 63)

    min_distance = float("inf")
    predicted_label = None

    for sample in train_samples:
        dist = compute_total_dtw(test_left, test_right, sample["left"], sample["right"])
        if dist < min_distance:
            min_distance = dist
            predicted_label = sample["label"]

    return predicted_label

In [None]:
# test
test_path = "DTW Dataset/MSL_Basic_Processed/test/Sorry/Sorry_01.npy" # path to a testing file
test_data = np.load(test_path, allow_pickle=True).item()

predicted = predict_sign(test_data, train_samples)
print("Predicted Sign:", predicted)

In [None]:
import os
import numpy as np

test_root = "DTW Dataset/MSL_Basic_Processed/test/" # path to a test folder
y_true = []
y_pred = []

for class_dir in os.listdir(test_root):
    class_path = os.path.join(test_root, class_dir)
    if not os.path.isdir(class_path):
        continue

    for file_name in os.listdir(class_path):
        if not file_name.endswith(".npy"):
            continue

        test_file_path = os.path.join(class_path, file_name)
        test_sample = np.load(test_file_path, allow_pickle=True).item()

        # Predict the sign using DTW
        predicted_label = predict_sign(test_sample, train_samples)

        y_true.append(test_sample['label'])      # actual label
        y_pred.append(predicted_label)           # predicted label


In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

accuracy = accuracy_score(y_true, y_pred)
print("DTW Classification Accuracy:", accuracy)

print("\nClassification Report:\n", classification_report(y_true, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_true, y_pred))

Convert npy files to JSON

In [None]:
import numpy as np
import json
import os

base = "DTW Dataset/MSL_Basic_Processed/train/" # path to a train folder
for cls in os.listdir(base):
    class_dir = os.path.join(base, cls)
    if not os.path.isdir(class_dir):
        continue

    for npy_file in os.listdir(class_dir):
        if npy_file.endswith(".npy"):
            data_path = os.path.join(class_dir, npy_file)
            data = np.load(data_path, allow_pickle=True).item()

            out = {
                "left": data["left"].tolist(),   # Convert ndarray to list
                "right": data["right"].tolist(), # Convert ndarray to list
                "label": data["label"]
            }

            json_path = os.path.join(class_dir, npy_file.replace(".npy", ".json"))
            with open(json_path, "w") as f:
                json.dump(out, f)

            print(f"Saved JSON: {json_path}")
