In [8]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import mediapipe as mp

# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

def extract_landmarks(frame):

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = holistic.process(rgb_frame)
    
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    
    return np.concatenate([lh, rh, pose, face])

def process_video(video_path, max_frames=30):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while len(frames) < max_frames and cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        landmarks = extract_landmarks(frame)
        frames.append(landmarks)
    cap.release()

    if len(frames) < max_frames:
        frames = frames + [np.zeros_like(frames[0])] * (max_frames - len(frames))
    elif len(frames) > max_frames:
        frames = frames[:max_frames]
    
    return np.array(frames)

def load_dataset(data_path, max_frames=30):
    X, y = [], []
    for gesture_id, gesture_folder in enumerate(sorted(os.listdir(data_path))):
        gesture_path = os.path.join(data_path, gesture_folder)
        for video_file in os.listdir(gesture_path):
            video_path = os.path.join(gesture_path, video_file)
            processed_video = process_video(video_path, max_frames)
            X.append(processed_video)
            y.append(gesture_id)
    return np.array(X), np.array(y)


def build_model(input_shape, num_classes):
    model = Sequential([
        LSTM(64, return_sequences=True, activation='relu', input_shape=input_shape),
        LSTM(128, return_sequences=True, activation='relu'),
        LSTM(64, return_sequences=False, activation='relu'),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


data_path = 'adjectives/'
X, y = load_dataset(data_path)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
input_shape = (X.shape[1], X.shape[2])  # (frames, features)
num_classes = 59
model = build_model(input_shape, num_classes)
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.2)
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {test_acc}")
# Save the model
model.save('sign_language_model.h5')

I0000 00:00:1726301992.593202   31225 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1726301992.699845  126651 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726301992.717296  126651 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726301992.721341  126652 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726301992.721620  126650 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726301992.721939  126655 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support 

Epoch 1/250
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 57ms/step - accuracy: 0.0256 - loss: 9.9663 - val_accuracy: 0.0159 - val_loss: 49.4983
Epoch 2/250
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.0143 - loss: 29.4716 - val_accuracy: 0.0238 - val_loss: 39.8544
Epoch 3/250
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 0.0041 - loss: 24.9945 - val_accuracy: 0.0317 - val_loss: 5.1553
Epoch 4/250
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step - accuracy: 0.0316 - loss: 4.6766 - val_accuracy: 0.0317 - val_loss: 4.1728
Epoch 5/250
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - accuracy: 0.0241 - loss: 5.8839 - val_accuracy: 0.0159 - val_loss: 4.0718
Epoch 6/250
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 0.0090 - loss: 4.0809 - val_accuracy: 0.0238 - val_loss: 4.1307
Epoch 7/250
[1m16/16[0



Test accuracy: 0.018987340852618217


In [5]:
import os
import shutil
adjectives_folder = "adjectives"
for folder_name in os.listdir(adjectives_folder):
    folder_path = os.path.join(adjectives_folder, folder_name)
    if os.path.isdir(folder_path):
        for subfolder_name in os.listdir(folder_path):
            subfolder_path = os.path.join(folder_path, subfolder_name)
            if os.path.isdir(subfolder_path) and subfolder_name.lower() =='extra':
                print(f"Removing extra folder: {subfolder_path}")
                shutil.rmtree(subfolder_path)

Removing extra folder: adjectives/5. Beautiful/Extra
