In [34]:
!pip install mediapipe opencv-python numpy tensorflow




In [35]:
import os
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical


In [36]:
DATASET_PATH = "/content/drive/MyDrive/ISL"  # Folder containing videos
SAVE_PATH = "/content/drive/MyDrive/Processed_ISL"
SEQUENCE_LENGTH = 30  # Fixed number of frames per video

if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)


In [37]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)


In [38]:
def extract_landmarks(video_path):
    cap = cv2.VideoCapture(video_path)
    keypoints_sequence = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                keypoints = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).flatten()
                if keypoints.shape[0] == 63:  # Ensure 21 keypoints * 3 (x, y, z)
                    keypoints_sequence.append(keypoints)

    cap.release()

    # Pad or truncate to fixed length
    if len(keypoints_sequence) > SEQUENCE_LENGTH:
        keypoints_sequence = keypoints_sequence[:SEQUENCE_LENGTH]
    else:
        padding = np.zeros((SEQUENCE_LENGTH - len(keypoints_sequence), 63))
        keypoints_sequence = np.vstack((keypoints_sequence, padding))

    return np.array(keypoints_sequence)


In [39]:
labels = []
data = []

for label in os.listdir(DATASET_PATH):
    label_path = os.path.join(DATASET_PATH, label)

    if os.path.isdir(label_path):
        for video_file in os.listdir(label_path):
            video_path = os.path.join(label_path, video_file)
            keypoints_sequence = extract_landmarks(video_path)

            if keypoints_sequence.shape == (SEQUENCE_LENGTH, 63):  # Ensure correct shape
                data.append(keypoints_sequence)
                labels.append(label)

# Convert to NumPy arrays and save
data = np.array(data)
labels = np.array(labels)

np.save(os.path.join(SAVE_PATH, "X.npy"), data)
np.save(os.path.join(SAVE_PATH, "y.npy"), labels, allow_pickle=True)


In [40]:
X = np.load(os.path.join(SAVE_PATH, "X.npy"))
y = np.load(os.path.join(SAVE_PATH, "y.npy"), allow_pickle=True)

print(f"X shape: {X.shape}, y shape: {y.shape}")


X shape: (15, 30, 63), y shape: (15,)


In [42]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # Convert labels to integers
y_categorical = to_categorical(y_encoded)   # One-hot encode labels


In [46]:
from tensorflow.keras.layers import Dropout, BatchNormalization

model = Sequential([
    LSTM(128, return_sequences=True, activation='relu', input_shape=(SEQUENCE_LENGTH, 63)),
    Dropout(0.2),  # Prevent overfitting
    LSTM(256, return_sequences=True, activation='relu'),
    Dropout(0.2),
    LSTM(128, return_sequences=False, activation='relu'),
    BatchNormalization(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(len(np.unique(y)), activation='softmax')
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),  # Lower LR for stable training
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


In [47]:
model.fit(X, y_categorical, epochs=50, batch_size=8, validation_split=0.2)


Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2s/step - accuracy: 0.0000e+00 - loss: 2.7016 - val_accuracy: 0.0000e+00 - val_loss: 2.7067
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 211ms/step - accuracy: 0.0972 - loss: 2.7369 - val_accuracy: 0.0000e+00 - val_loss: 2.7148
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step - accuracy: 0.0000e+00 - loss: 2.7067 - val_accuracy: 0.0000e+00 - val_loss: 2.7230
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step - accuracy: 0.1528 - loss: 2.6132 - val_accuracy: 0.0000e+00 - val_loss: 2.7284
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step - accuracy: 0.1111 - loss: 2.6569 - val_accuracy: 0.0000e+00 - val_loss: 2.7342
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227ms/step - accuracy: 0.2917 - loss: 2.5379 - val_accuracy: 0.0000e+00 - val_loss: 2.7378
Epoch 7/50

<keras.src.callbacks.history.History at 0x7955663a8810>