# Sign Language Recognition with RNN

This notebook trains an RNN model to recognize sign language from landmark data extracted using MediaPipe.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            print(gpu)
    except RuntimeError as e:
        print('error'+ e)


PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [3]:
from numba import cuda

cuda.select_device(0)
cuda.close()
cuda.select_device(0)


<weakproxy at 0x0000021BCE329990 to Device at 0x0000021BCE30C6D0>

## Load and Preprocess Data

In [4]:
# Load the CSV file
train_df = pd.read_csv('train.csv')

In [5]:
# selected_words = ["TV", "after", "airplane", "all", "alligator", "animal", "another", "any", "apple", "arm"]
selected_words = ["TV", "after", "airplane", "all", "alligator"]

# Filter the dataframe to include only the selected words
filtered_df = train_df[train_df['sign'].isin(selected_words)]

# Group by 'sign' and select 10 sequences for each word
subset_df = filtered_df.groupby('sign').head(200)

In [6]:
subset_df

Unnamed: 0,path,participant_id,sequence_id,sign
50,train_landmark_files/61333/1002052130.parquet,61333,1002052130,TV
73,train_landmark_files/62590/1002885072.parquet,62590,1002885072,alligator
84,train_landmark_files/22343/1003347075.parquet,22343,1003347075,TV
187,train_landmark_files/32319/1007376023.parquet,32319,1007376023,all
197,train_landmark_files/61333/1007883975.parquet,61333,1007883975,alligator
...,...,...,...,...
53564,train_landmark_files/55372/3192381381.parquet,55372,3192381381,after
53686,train_landmark_files/49445/319779922.parquet,49445,319779922,after
54446,train_landmark_files/53618/3226528685.parquet,53618,3226528685,after
54901,train_landmark_files/32319/324454876.parquet,32319,324454876,after


In [7]:
subset_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1000 entries, 50 to 55213
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   path            1000 non-null   object
 1   participant_id  1000 non-null   int64 
 2   sequence_id     1000 non-null   int64 
 3   sign            1000 non-null   object
dtypes: int64(2), object(2)
memory usage: 39.1+ KB


In [8]:
# Function to load parquet files
def load_parquet_file(filepath):
    df = pd.read_parquet(filepath)
    return df

In [9]:
def preprocess_data(subset_df):
    sequences = []
    labels = []
    
    for index, row in subset_df.iterrows():
        path = row['path']
        label = row['sign']
        
        # Load the landmark file
        landmark_df = load_parquet_file(path)
        
        # Extract x, y coordinates
        face = landmark_df[landmark_df['type'] == 'face'][['x', 'y']].values
        left_hand = landmark_df[landmark_df['type'] == 'left_hand'][['x', 'y']].values
        pose = landmark_df[landmark_df['type'] == 'pose'][['x', 'y']].values
        right_hand = landmark_df[landmark_df['type'] == 'right_hand'][['x', 'y']].values
        
        # Ensure the landmarks have consistent lengths
        # max_landmarks = max(len(face), len(left_hand), len(pose), len(right_hand))
        face = np.pad(face, ((0, 468 - len(face)), (0, 0)), mode='constant')
        left_hand = np.pad(left_hand, ((0, 21 - len(left_hand)), (0, 0)), mode='constant')
        pose = np.pad(pose, ((0, 33 - len(pose)), (0, 0)), mode='constant')
        right_hand = np.pad(right_hand, ((0, 21 - len(right_hand)), (0, 0)), mode='constant')
        
        # Concatenate the landmarks
        sequence = np.concatenate([face, left_hand, pose, right_hand], axis=0)
        
        sequences.append(sequence)
        labels.append(label)
    
    # Calculate the maximum sequence length
    
    # Pad the sequences to ensure they all have the same length
    max_sequence_length = 500  # Set a maximum sequence length
    sequences_padded = pad_sequences(sequences, maxlen=max_sequence_length, padding='post', dtype='float32')
    
    # Label encoding
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)
    joblib.dump(label_encoder, 'label_encoder.pkl')
    
    return sequences_padded, labels_encoded, max_sequence_length, label_encoder


In [10]:
sequences_padded, labels_encoded, max_seq_len, label_encoder = preprocess_data(subset_df)


ValueError: index can't contain negative values

## Build the RNN Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping


In [None]:
model = Sequential()

# Masking layer for handling variable sequence lengths
model.add(Masking(mask_value=0., input_shape=(max_seq_len, sequences_padded.shape[2])))

# LSTM layers with dropout for regularization
model.add(LSTM(256, return_sequences=True))
model.add(Dropout(0.3))  # Dropout layer to prevent overfitting
model.add(LSTM(256))
model.add(Dropout(0.3))

# Dense output layer with softmax activation for multi-class classification
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

# Compile the model with Adam optimizer and categorical cross-entropy loss
optimizer = Adam(learning_rate=0.001)

# Print model summary


In [None]:


# model = Sequential()
# model.add(Masking(mask_value=0., input_shape=(max_seq_len, sequences_padded.shape[2])))  # Adjust input shape based on sequence length and number of features
# model.add(LSTM(128, return_sequences=True))
# model.add(LSTM(128))
# model.add(Dense(len(label_encoder.classes_), activation='softmax'))


In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking (Masking)           (None, 500, 2)            0         
                                                                 
 lstm (LSTM)                 (None, 500, 256)          265216    
                                                                 
 dropout (Dropout)           (None, 500, 256)          0         
                                                                 
 lstm_1 (LSTM)               (None, 256)               525312    
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense (Dense)               (None, 5)                 1285      
                                                                 
Total params: 791,813
Trainable params: 791,813
Non-trai

## Train the Model

In [None]:
model.fit(sequences_padded, labels_encoded, epochs=20, batch_size=4, validation_split=0.1)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1465f974040>

## Evaluate the Model

In [None]:
loss, accuracy = model.evaluate(sequences_padded, labels_encoded)
print(f'Loss: {loss}, Accuracy: {accuracy}')

Loss: 1.6101627349853516, Accuracy: 0.20000000298023224


In [None]:
model.save('my_model.keras')


In [None]:
model.save('my_model.h5')

In [None]:
type(sequences_padded)


numpy.ndarray

: 

## Convert to TensorFlow Lite

In [None]:
# # Convert the model to TensorFlow Lite format
# converter = tf.lite.TFLiteConverter.from_keras_model(model)
# tflite_model = converter.convert()

# # Save the model
# with open('model.tflite', 'wb') as f:
#     f.write(tflite_model)