In [1]:
pip list

Package                   Version
------------------------- ----------------
anyio                     4.6.0
appdirs                   1.4.4
apturl                    0.5.2
argon2-cffi               23.1.0
argon2-cffi-bindings      21.2.0
arrow                     1.3.0
asttokens                 2.4.1
async-lru                 2.0.4
attrs                     24.2.0
babel                     2.16.0
backcall                  0.2.0
beautifulsoup4            4.12.3
beniget                   0.4.1
bleach                    6.1.0
blinker                   1.4
Brlapi                    0.8.3
Brotli                    1.0.9
certifi                   2020.6.20
cffi                      1.17.1
chardet                   4.0.0
charset-normalizer        3.3.2
click                     8.0.3
colorama                  0.4.4
comm                      0.2.2
command-not-found         0.3
cryptography              3.4.8
cupshelpers               1.0
cycler                    0.11.0
dbus-python           

In [3]:
import os
import numpy as np
import tensorflow as tf
import cv2
import mediapipe as mp

from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

2024-09-28 15:39:46.808072: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-28 15:39:46.811654: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-28 15:39:46.822077: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-28 15:39:46.839872: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-28 15:39:46.844706: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-28 15:39:46.858049: I tensorflow/core/platform/cpu_feature_gu

In [4]:
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5)

I0000 00:00:1727518269.298294   16486 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1727518269.300872   17180 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: AMD Radeon RX 6600 (navi23, LLVM 15.0.7, DRM 3.57, 6.8.0-45-generic)
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1727518269.390348   17170 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1727518269.434953   17164 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1727518269.437175   17163 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1727518269.438550   17166 inference_feedback_manager.cc:114] Feedback manager

In [5]:
def extract_landmarks(frame):
    results = holistic.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    
    return np.concatenate([lh, rh, pose, face])

In [6]:
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        landmarks = extract_landmarks(frame)
        frames.append(landmarks)
    cap.release()
    return np.array(frames)

In [11]:
def build_model(input_shape, num_classes):
    model = Sequential([
        # Masking(mask_value=0., input_shape=input_shape),
        LSTM(64, return_sequences=True, activation='relu'),
        LSTM(128, return_sequences=True, activation='relu'),
        LSTM(64, return_sequences=False, activation='relu'),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [12]:
def load_dataset(data_path):
    X, y, gesture_labels = [], [], []
    for gesture_id, gesture_folder in enumerate(sorted(os.listdir(data_path))):
        gesture_path = os.path.join(data_path, gesture_folder)
        gesture_labels.append(gesture_folder)
        for video_file in os.listdir(gesture_path):
            video_path = os.path.join(gesture_path, video_file)
            print(video_path)
            processed_video = process_video(video_path)
            X.append(processed_video)
            y.append(gesture_id)
    return X, np.array(y), gesture_labels

In [13]:
def pad_sequences_3d(sequences, max_length=None, padding='post'):
    if max_length is None:
        max_length = max(len(seq) for seq in sequences)
    
    feature_dim = sequences[0].shape[-1]
    padded_sequences = np.zeros((len(sequences), max_length, feature_dim))
    
    for i, seq in enumerate(sequences):
        if len(seq) > max_length:
            padded_sequences[i] = seq[-max_length:]
        else:
            if padding == 'post':
                padded_sequences[i, :len(seq)] = seq
            else:
                padded_sequences[i, -len(seq):] = seq
    
    return padded_sequences

In [14]:
def main():
    data_path = 'Adjectives/'
    X, y, gesture_labels = load_dataset(data_path)
    
    # Calculate the 95th percentile of sequence lengths
    sequence_lengths = [len(seq) for seq in X]
    max_length = int(np.percentile(sequence_lengths, 95))
    np.save('max_length.npy', max_length)
    np.save('gesture_labels.npy', gesture_labels)
    
    # Pad sequences
    X_padded = pad_sequences_3d(X, max_length=max_length)
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)
    
    # Convert labels to categorical
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)
    
    # Build and train the model
    input_shape = (X_padded.shape[1], X_padded.shape[2])
    num_classes = len(gesture_labels)
    model = build_model(input_shape, num_classes)
    
    history = model.fit(X_train, y_train, epochs=250, batch_size=32, validation_split=0.2)
    
    # Evaluate the model
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print(f"Test accuracy: {test_acc}")
    
    # Save the model, max_length, and gesture labels
    model.save('sign_language_model.h5')

In [15]:
if __name__== "__main__":
    main()

Adjectives/Beautiful/MVI_9569.MOV
Adjectives/Beautiful/MVI_9570.MOV
Adjectives/Blind/MVI_9585.MOV
Adjectives/Blind/MVI_9584.MOV
Adjectives/Deaf/MVI_9850.MOV
Adjectives/Deaf/MVI_9851.MOV
Adjectives/Happy/MVI_5183.MOV
Adjectives/Happy/MVI_5184.MOV
Adjectives/Loud/MVI_5178.MOV
Adjectives/Loud/MVI_5177.MOV
Epoch 1/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.1667 - loss: 1.6122 - val_accuracy: 0.0000e+00 - val_loss: 119.4473
Epoch 2/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - accuracy: 0.1667 - loss: 81.3443 - val_accuracy: 0.0000e+00 - val_loss: 1467.9631
Epoch 3/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - accuracy: 0.0000e+00 - loss: 983.4334 - val_accuracy: 0.0000e+00 - val_loss: 524.9114
Epoch 4/250
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.3333 - loss: 110.5691 - val_accuracy: 0.5000 - val_loss: 1064.6504
Epoch 5/250
[1m1/1[0m 



Test accuracy: 0.5


In [1]:
import os
gesture_labels=sorted(os.listdir("Adjectives/"))

In [3]:
gesture_labels

['Beautiful', 'Blind', 'Deaf', 'Happy', 'Loud']