System Configration

In [1]:
import tensorflow as tf
from numba import cuda

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            print(gpu)
    except RuntimeError as e:
        print('error'+ e)


PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [3]:
cuda.select_device(0)
cuda.close()
cuda.select_device(0)


<weakproxy at 0x00000152A2E6E9D0 to Device at 0x00000152A2E564D0>

# Sign Language Recognition with RNN

This notebook trains an RNN model to recognize sign language from landmark data extracted using MediaPipe.

In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.sequence import pad_sequences


## Load Preprocessed Data

In [5]:

# Load the combined CSV file into a DataFrame
# csv_file_path = 'D:/Projects/AIML/HandSign/Dataset_CSVs/transformed_data.csv'
csv_file_path = 'D:/Projects/AIML/HandSign/Dataset_CSVs/keypoints_data.csv'

df = pd.read_csv(csv_file_path)


In [6]:
print(df.head())

   face_0_x  face_0_y  face_1_x  face_1_y  face_2_x  face_2_y  face_3_x  \
0  0.480987  0.600960  0.486844  0.549152  0.483475  0.566798  0.479284   
1  0.515435  0.612552  0.521321  0.560277  0.517003  0.578228  0.511478   
2  0.499053  0.592653  0.503803  0.538680  0.500637  0.558055  0.494142   
3  0.437985  0.589782  0.439237  0.538740  0.438284  0.556461  0.429700   
4  0.420540  0.584587  0.419919  0.535113  0.419445  0.551029  0.408256   

   face_3_y  face_4_x  face_4_y  ...  pose_28_y  pose_29_x  pose_29_y  \
0  0.495447  0.488247  0.532376  ...   3.211675   0.699006   3.337574   
1  0.506666  0.522563  0.543482  ...   3.176484   0.687595   3.314011   
2  0.487191  0.504793  0.522018  ...   3.123261   0.685968   3.249211   
3  0.488853  0.439693  0.522769  ...   3.069936   0.615897   3.201243   
4  0.484223  0.419772  0.519081  ...   3.183950   0.663719   3.308161   

   pose_30_x  pose_30_y  pose_31_x  pose_31_y  pose_32_x  pose_32_y  label  
0   0.386315   3.310355   0.65276

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175 entries, 0 to 174
Columns: 1087 entries, face_0_x to label
dtypes: float64(1086), object(1)
memory usage: 1.5+ MB


In [8]:
df.shape

(175, 1087)

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pickle



In [10]:
left_hand_columns = [col for col in df.columns if col.startswith('left_hand')]
right_hand_columns = [col for col in df.columns if col.startswith('right_hand')]
pose_columns = [col for col in df.columns if col.startswith('pose')]

# Ensure the data is in the correct shape (number_of_samples, number_of_frames, number_of_features_per_frame)
def reshape_data(df, columns, num_frames):
    data = df[columns].values
    num_samples = len(df) // num_frames
    data = data.reshape(num_samples, num_frames, len(columns))
    return data


# Assuming num_frames is known
num_frames = 1 # This should be the length of the time series

left_hand_data = reshape_data(df, left_hand_columns, num_frames)
right_hand_data = reshape_data(df, right_hand_columns, num_frames)
pose_data = reshape_data(df, pose_columns, num_frames)

In [11]:
print("Left hand data shape:", left_hand_data.shape)
print("Right hand data shape:", right_hand_data.shape)
print("Pose data shape:", pose_data.shape)

Left hand data shape: (175, 1, 42)
Right hand data shape: (175, 1, 42)
Pose data shape: (175, 1, 66)


In [12]:
num_frames

1

In [13]:
# Extract the labels and convert them to one-hot encoding if necessary
labels = df['label'].values[:len(df) // num_frames * num_frames]
labels = labels.reshape(len(labels) // num_frames, num_frames)[:, 0]  # Assuming one label per sample


In [14]:
labels.shape

(175,)

In [15]:
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
onehot_encoder = OneHotEncoder(sparse_output=False)
labels_onehot = onehot_encoder.fit_transform(labels_encoded.reshape(-1, 1))


# with open('Models/label_encoder_word.pkl', 'wb') as file:
#     pickle.dump(label_encoder, file)

with open('Models/label_encoder_letter.pkl', 'wb') as file:
    pickle.dump(label_encoder, file)

In [16]:


# Define input shapes for each branch
hand_input_shape = (num_frames, len(right_hand_columns))
pose_input_shape = (num_frames, len(pose_columns))





In [17]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Concatenate, Multiply
from tensorflow.keras.optimizers import Adam

def create_multi_branch_model(hand_input_shape, pose_input_shape, number_of_classes, hand_units=50, pose_units=25):
    
    # Input layers for each set of features
    left_hand_input = Input(shape=hand_input_shape)
    right_hand_input = Input(shape=hand_input_shape)
    pose_input = Input(shape=pose_input_shape)
    
    
    # Left Hand branch
    left_hand_lstm = LSTM(units=hand_units)(left_hand_input)
    
    # Right Hand branch
    right_hand_lstm = LSTM(units=hand_units)(right_hand_input)
    
    # Pose branch
    pose_lstm = LSTM(units=pose_units)(pose_input)
    
    
    # Weighted combination of branches
    left_hand_output = Multiply()([left_hand_lstm, Dense(1, activation='linear', use_bias=False)(left_hand_lstm)])
    right_hand_output = Multiply()([right_hand_lstm, Dense(1, activation='linear', use_bias=False)(right_hand_lstm)])
    pose_output = Multiply()([pose_lstm, Dense(1, activation='linear', use_bias=False)(pose_lstm)])
    
    # Concatenate the outputs
    combined_output = Concatenate()([left_hand_output, right_hand_output, pose_output])
    
    # Final dense layer for classification
    final_output = Dense(units=number_of_classes, activation='softmax')(combined_output)
    
    model = Model(inputs=[left_hand_input, right_hand_input, pose_input], outputs=final_output)
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model


In [18]:
number_of_classes = len(onehot_encoder.categories_[0])
model = create_multi_branch_model(hand_input_shape, pose_input_shape,  number_of_classes)



In [19]:
# Split data into training and validation sets
X_left_train, X_left_val, X_right_train, X_right_val, X_pose_train, X_pose_val, y_train, y_val = train_test_split(
    left_hand_data, right_hand_data, pose_data, labels_onehot, test_size=0.2, random_state=42)


In [20]:
history = model.fit([X_left_train, X_right_train, X_pose_train], y_train, epochs=1000, batch_size=32,
                    validation_data=([X_left_val, X_right_val, X_pose_val], y_val))



# Evaluate the model
loss, accuracy = model.evaluate([X_left_val, X_right_val, X_pose_val], y_val)
print(f"Validation Loss: {loss}, Validation Accuracy: {accuracy}")

Epoch 1/1000
1/5 [=====>........................] - ETA: 33s - loss: 3.2589 - accuracy: 0.1562

In [None]:

# Save the model
model.save('Models/sign_language_letter_model_2.h5')
# model.save('Models/sign_language_word_model_2.h5')