In [5]:
import cv2
import numpy as np
import mediapipe as mp
import os
from tqdm import tqdm
mp_holistic = mp.solutions.holistic

# Extract keypoints from frame
def extract_landmarks(results):
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33 * 3)
    left = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    right = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([pose, left, right])  # total 225 features

In [8]:
def extract_sequence_from_video(video_path, max_frames=117):
    cap = cv2.VideoCapture(video_path)
    sequence = []

    with mp_holistic.Holistic(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image.flags.writeable = False
            results = holistic.process(image)
            image.flags.writeable = True

            keypoints = extract_landmarks(results)
            sequence.append(keypoints)

            if len(sequence) >= max_frames:
                break

    cap.release()
    return np.array(sequence)


In [10]:

MAX_FRAMES = 117  # maximum frame length to extract
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

X, y = [], []
labels = []

#DATASET_PATH = 'D:/code/Mini/Final data'
OUTPUT_PATH = 'D:/code/Mini/pro v6/extracted_sequences'
for word in os.listdir(OUTPUT_PATH):
    word_path = os.path.join(OUTPUT_PATH, word)
    for file in os.listdir(word_path):
        sequence = np.load(os.path.join(word_path, file))

        # Pad/truncate
        if sequence.shape[0] < MAX_FRAMES:
            pad_len = MAX_FRAMES - sequence.shape[0]
            padding = np.zeros((pad_len, 225))
            sequence = np.vstack((sequence, padding))
        else:
            sequence = sequence[:MAX_FRAMES]

        X.append(sequence)
        y.append(word)

X = np.array(X)
y = np.array(y)
print("X shape:", X.shape)  # should be (num_samples, 117, 225)
print("y shape:", y.shape)


X shape: (3557, 117, 225)
y shape: (3557,)


In [11]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

print("Classes:", label_encoder.classes_)  # list of your 50 words


Classes: ['1. loud' '10. Mean' '11. rich' '12. poor' '13. thick' '17. flat'
 '18. City' '18. curved' '19. House' '19. male' '2. quiet'
 '20. Street or Road' '20. female' '21. Train Station' '22. Restaurant'
 '23. Court' '23. high' '24. School' '24. low' '25. Office' '25. soft'
 '26. University' '26. hard' '27. Park' '27. deep' '28. shallow'
 '29. clean' '3. happy' '30. dirty' '31. strong' '32. weak' '33. dead'
 '34. alive' '35. heavy' '36. light' '37. Hat' '38. Dress' '39. Key'
 '39. Suit' '39. famous' '4. sad' '40. I' '40. Paint' '40. Skirt'
 '41. Letter' '41. Shirt' '41. you' '42. Paper' '42. T-Shirt' '42. he'
 '43. Lock' '43. Pant' '43. she' '44. Shoes' '44. Telephone' '44. it'
 '45. Bag' '45. Pocket' '45. we' '46. Box' '46. Clothing'
 '46. you (plural)' '47. Gift' '47. they' '48. Card' '48. Hello'
 '49. How are you' '49. Ring' '5. Beautiful' '50. Alright' '50. Tool'
 '51. Good Morning' '52. Good afternoon' '58. Son' '59. Daughter'
 '6. Ugly' '60. Mother' '61. Father' '62. Parent' '

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, Dense, Dropout, Masking
from tensorflow.keras.optimizers import Adam

model = Sequential()
model.add(Masking(mask_value=0.0, input_shape=(117, 225)))  # mask padded zeros
model.add(Bidirectional(LSTM(128, return_sequences=True)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(64)))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

model.compile(optimizer=Adam(learning_rate=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(**kwargs)


In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, stratify=y, random_state=42)

history = model.fit(X_train, y_train, 
                    validation_data=(X_test, y_test),
                    epochs=60,  # feel free to tune
                    batch_size=32)


Epoch 1/60
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 92ms/step - accuracy: 0.0078 - loss: 4.6397 - val_accuracy: 0.0239 - val_loss: 4.5047
Epoch 2/60
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 83ms/step - accuracy: 0.0252 - loss: 4.4579 - val_accuracy: 0.0435 - val_loss: 4.1909
Epoch 3/60
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 85ms/step - accuracy: 0.0310 - loss: 4.2093 - val_accuracy: 0.0618 - val_loss: 3.9980
Epoch 4/60
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 84ms/step - accuracy: 0.0473 - loss: 4.0388 - val_accuracy: 0.0674 - val_loss: 3.8392
Epoch 5/60
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 87ms/step - accuracy: 0.0614 - loss: 3.8660 - val_accuracy: 0.0787 - val_loss: 3.6719
Epoch 6/60
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 87ms/step - accuracy: 0.0816 - loss: 3.6993 - val_accuracy: 0.0843 - val_loss: 3.5833
Epoch 7/60
[1m89/89[0m [32m━━━

In [None]:
model.save("isl_bilstm_model_v3.h5")


In [None]:
import pickle

with open("label_encoder_v3.pkl", "wb") as f:
    pickle.dump(label_encoder, f)
