# Sign Language Recognition with RNN

This notebook trains an RNN model to recognize sign language from landmark data extracted using MediaPipe.

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            print(gpu)
    except RuntimeError as e:
        print('error'+ e)


PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [3]:
from numba import cuda

cuda.select_device(0)
cuda.close()
cuda.select_device(0)


<weakproxy at 0x0000023F79945B70 to Device at 0x0000023F79934460>

## Load Preprocessed Data

In [4]:

# Load the combined CSV file into a DataFrame
# csv_file_path = 'D:/Projects/AIML/HandSign/Dataset_CSVs/transformed_data.csv'
csv_file_path = 'D:/Projects/AIML/HandSign/Dataset_CSVs/keypoints_data.csv'

df = pd.read_csv(csv_file_path)




In [5]:
print(df.head())

   frame  face_0_x  face_0_y  face_1_x  face_1_y  face_2_x  face_2_y  \
0     18  0.446865  0.556099  0.435207  0.519262  0.438583  0.531356   
1     19  0.446033  0.553307  0.436424  0.516476  0.439572  0.528051   
2     20  0.444841  0.553031  0.435007  0.517342  0.438359  0.528179   
3     21  0.438405  0.555483  0.432000  0.517451  0.435937  0.529129   
4     22  0.436116  0.555547  0.432138  0.518918  0.435548  0.530537   

   face_3_x  face_3_y  face_4_x  ...  pose_28_y  pose_29_x  pose_29_y  \
0  0.423945  0.494480  0.433858  ...   2.011694   0.634389   2.089952   
1  0.424151  0.491375  0.434981  ...   1.971428   0.645629   2.029157   
2  0.422074  0.491810  0.433355  ...   1.979363   0.668940   2.030304   
3  0.419451  0.492080  0.430299  ...   2.013071   0.677119   2.062510   
4  0.419620  0.492879  0.430561  ...   2.030979   0.676004   2.079525   

   pose_30_x  pose_30_y  pose_31_x  pose_31_y  pose_32_x  pose_32_y  label  
0   0.285221   2.081855   0.559667   2.169359   0.3

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1634 entries, 0 to 1633
Columns: 1088 entries, frame to label
dtypes: float64(1086), int64(1), object(1)
memory usage: 13.6+ MB


In [7]:
df.shape

(1634, 1088)

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pickle



In [9]:
left_hand_columns = [col for col in df.columns if col.startswith('left_hand')]
right_hand_columns = [col for col in df.columns if col.startswith('right_hand')]
pose_columns = [col for col in df.columns if col.startswith('pose')]

# Ensure the data is in the correct shape (number_of_samples, number_of_frames, number_of_features_per_frame)
def reshape_data(df, columns, num_frames):
    data = df[columns].values
    num_samples = len(df) // num_frames
    data = data.reshape(num_samples, num_frames, len(columns))
    return data


# Assuming num_frames is known
num_frames = 1 # This should be the length of the time series

left_hand_data = reshape_data(df, left_hand_columns, num_frames)
right_hand_data = reshape_data(df, right_hand_columns, num_frames)
pose_data = reshape_data(df, pose_columns, num_frames)

In [10]:
print("Left hand data shape:", left_hand_data.shape)
print("Right hand data shape:", right_hand_data.shape)
print("Pose data shape:", pose_data.shape)

Left hand data shape: (1634, 1, 42)
Right hand data shape: (1634, 1, 42)
Pose data shape: (1634, 1, 66)


In [11]:
num_frames

1

In [12]:
# Extract the labels and convert them to one-hot encoding if necessary
labels = df['label'].values[:len(df) // num_frames * num_frames]
labels = labels.reshape(len(labels) // num_frames, num_frames)[:, 0]  # Assuming one label per sample


In [13]:
labels.shape

(1634,)

In [14]:
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
onehot_encoder = OneHotEncoder(sparse_output=False)
labels_onehot = onehot_encoder.fit_transform(labels_encoded.reshape(-1, 1))


# with open('Models/label_encoder_word.pkl', 'wb') as file:
#     pickle.dump(label_encoder, file)

with open('Models/label_encoder_letter.pkl', 'wb') as file:
    pickle.dump(label_encoder, file)

In [15]:


# Define input shapes for each branch
hand_input_shape = (num_frames, len(right_hand_columns))
pose_input_shape = (num_frames, len(pose_columns))





In [16]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Concatenate, Multiply
from tensorflow.keras.optimizers import Adam

def create_multi_branch_model(hand_input_shape, pose_input_shape, number_of_classes, hand_units=50, pose_units=25):
    
    # Input layers for each set of features
    left_hand_input = Input(shape=hand_input_shape)
    right_hand_input = Input(shape=hand_input_shape)
    pose_input = Input(shape=pose_input_shape)
    
    
    # Left Hand branch
    left_hand_lstm = LSTM(units=hand_units)(left_hand_input)
    
    # Right Hand branch
    right_hand_lstm = LSTM(units=hand_units)(right_hand_input)
    
    # Pose branch
    pose_lstm = LSTM(units=pose_units)(pose_input)
    
    
    # Weighted combination of branches
    left_hand_output = Multiply()([left_hand_lstm, Dense(1, activation='linear', use_bias=False)(left_hand_lstm)])
    right_hand_output = Multiply()([right_hand_lstm, Dense(1, activation='linear', use_bias=False)(right_hand_lstm)])
    pose_output = Multiply()([pose_lstm, Dense(1, activation='linear', use_bias=False)(pose_lstm)])
    
    # Concatenate the outputs
    combined_output = Concatenate()([left_hand_output, right_hand_output, pose_output])
    
    # Final dense layer for classification
    final_output = Dense(units=number_of_classes, activation='softmax')(combined_output)
    
    model = Model(inputs=[left_hand_input, right_hand_input, pose_input], outputs=final_output)
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model


In [17]:
number_of_classes = len(onehot_encoder.categories_[0])
model = create_multi_branch_model(hand_input_shape, pose_input_shape,  number_of_classes)



In [18]:
# Split data into training and validation sets
X_left_train, X_left_val, X_right_train, X_right_val, X_pose_train, X_pose_val, y_train, y_val = train_test_split(
    left_hand_data, right_hand_data, pose_data, labels_onehot, test_size=0.2, random_state=42)


In [19]:
history = model.fit([X_left_train, X_right_train, X_pose_train], y_train, epochs=200, batch_size=32,
                    validation_data=([X_left_val, X_right_val, X_pose_val], y_val))



# Evaluate the model
loss, accuracy = model.evaluate([X_left_val, X_right_val, X_pose_val], y_val)
print(f"Validation Loss: {loss}, Validation Accuracy: {accuracy}")

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [20]:

# Save the model
model.save('Models/sign_language_word_model_2.h5')
