# Model Building

20 Words to train (daily used words): <br>
"Family", "Friends", "Work", "School", "Home", "Car", "Happy", "Sad", "Play", "Help", "Eat", "Drink", "Sleep", "Sorry", "Computer", "Money", "Phone", "Cloth", "Me", "Stop"

## Import Library

In [1]:
import cv2
import numpy as np
import os
import mediapipe as mp

## Data Preparation and Create Labels and Features

In [2]:
# Desired path for data
desired_path = "C:\\Users\\erwin\\Desktop\\ASL_Translation_FYP"

# Creating a subfolder for MP_Data within the desired path
DATA_PATH = os.path.join(desired_path, 'ASL_Dataset') 

# Actions that we try to detect
actions = np.array(["Family", "Friends", "Work", "School", "Home", "Car", "Happy", "Sad", "Play", 
                    "Help", "Eat", "Drink", "Sleep", "Sorry", "Computer", "Money", "Phone", "Cloth", "Me", "Stop"])

# Videos are going to be 30 frames in length
sequence_length = 30

In [3]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [4]:
label_map = {label:num for num, label in enumerate(actions)}

In [5]:
label_map

{'Family': 0,
 'Friends': 1,
 'Work': 2,
 'School': 3,
 'Home': 4,
 'Car': 5,
 'Happy': 6,
 'Sad': 7,
 'Play': 8,
 'Help': 9,
 'Eat': 10,
 'Drink': 11,
 'Sleep': 12,
 'Sorry': 13,
 'Computer': 14,
 'Money': 15,
 'Phone': 16,
 'Cloth': 17,
 'Me': 18,
 'Stop': 19}

In [6]:
sequences, labels = [], []
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [7]:
np.array(sequences).shape

(800, 30, 1662)

In [8]:
np.array(labels).shape

(800,)

In [9]:
X = np.array(sequences)

In [10]:
X.shape

(800, 30, 1662)

In [11]:
y = to_categorical(labels).astype(int)

In [12]:
y.shape

(800, 20)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Build and Train LSTM Neutral Network 

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.optimizers import Adam


In [15]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [16]:
LSTM_model = Sequential()
LSTM_model.add(LSTM(64, return_sequences=True, input_shape=(30,1662)))
LSTM_model.add(LSTM(128, return_sequences=True))
LSTM_model.add(LSTM(64))
LSTM_model.add(Dense(64, activation='relu'))
LSTM_model.add(Dense(32, activation='relu'))
LSTM_model.add(Dense(actions.shape[0], activation='softmax'))

  super().__init__(**kwargs)


In [17]:
LSTM_model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [18]:
LSTM_model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])

Epoch 1/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - categorical_accuracy: 0.0442 - loss: 2.9992
Epoch 2/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - categorical_accuracy: 0.0555 - loss: 2.9726
Epoch 3/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - categorical_accuracy: 0.1144 - loss: 2.7489
Epoch 4/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - categorical_accuracy: 0.0811 - loss: 2.9924
Epoch 5/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - categorical_accuracy: 0.0462 - loss: 3.0021
Epoch 6/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - categorical_accuracy: 0.0405 - loss: 2.9975
Epoch 7/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - categorical_accuracy: 0.0455 - loss: 2.9961
Epoch 8/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

<keras.src.callbacks.history.History at 0x146936bdb90>

In [26]:
test_loss, test_accuracy = LSTM_model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - categorical_accuracy: 0.9535 - loss: 0.1770 
Test Loss: 0.22588662803173065
Test Accuracy: 0.9437500238418579


In [19]:
LSTM_model.summary()

## Evaluation using Confusin Matrix and Accuracy - LSTM

In [20]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, classification_report, confusion_matrix

In [21]:
yhat = LSTM_model.predict(X_test)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [22]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [23]:
confusion_matrix(ytrue, yhat)

array([[ 9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  9,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  8,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  1,  3,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  6,  0,  0,  0,  0,  0,  0,  1,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  1,  0,  0,  0,  0,  0,  0,  0, 10,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0

In [24]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[151,   0],
        [  0,   9]],

       [[149,   1],
        [  1,   9]],

       [[150,   1],
        [  1,   8]],

       [[153,   3],
        [  1,   3]],

       [[155,   0],
        [  0,   5]],

       [[151,   0],
        [  0,   9]],

       [[153,   0],
        [  1,   6]],

       [[152,   0],
        [  0,   8]],

       [[152,   0],
        [  0,   8]],

       [[149,   0],
        [  1,  10]],

       [[154,   0],
        [  0,   6]],

       [[156,   1],
        [  0,   3]],

       [[151,   0],
        [  0,   9]],

       [[149,   2],
        [  0,   9]],

       [[152,   0],
        [  0,   8]],

       [[151,   1],
        [  1,   7]],

       [[149,   0],
        [  1,  10]],

       [[154,   0],
        [  1,   5]],

       [[150,   0],
        [  1,   9]],

       [[150,   0],
        [  0,  10]]], dtype=int64)

In [25]:
print(accuracy_score(ytrue, yhat))

0.94375


In [27]:
print(classification_report(ytrue, yhat, zero_division=0))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       0.90      0.90      0.90        10
           2       0.89      0.89      0.89         9
           3       0.50      0.75      0.60         4
           4       1.00      1.00      1.00         5
           5       1.00      1.00      1.00         9
           6       1.00      0.86      0.92         7
           7       1.00      1.00      1.00         8
           8       1.00      1.00      1.00         8
           9       1.00      0.91      0.95        11
          10       1.00      1.00      1.00         6
          11       0.75      1.00      0.86         3
          12       1.00      1.00      1.00         9
          13       0.82      1.00      0.90         9
          14       1.00      1.00      1.00         8
          15       0.88      0.88      0.88         8
          16       1.00      0.91      0.95        11
          17       1.00    

## Save Model

In [28]:
# Define the base directory where you want to save the model
base_directory = r'C:\\Users\\erwin\\Desktop\\ASL_Translation_FYP\\Models'

LSTM_model.save(f'{base_directory}\\LSTM_Model.h5')




## Build and Train CNN

In [29]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.callbacks import TensorBoard

In [30]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [31]:
CNN_model = Sequential()
CNN_model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(30,1662)))
CNN_model.add(MaxPooling1D(pool_size=2))
CNN_model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
CNN_model.add(MaxPooling1D(pool_size=2))
CNN_model.add(Flatten())
CNN_model.add(Dense(64, activation='relu'))
CNN_model.add(Dense(32, activation='relu'))
CNN_model.add(Dense(actions.shape[0], activation='softmax'))

  super().__init__(


In [32]:
CNN_model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [33]:
CNN_model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])

Epoch 1/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - categorical_accuracy: 0.0496 - loss: 3.0993
Epoch 2/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - categorical_accuracy: 0.0592 - loss: 3.0099
Epoch 3/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - categorical_accuracy: 0.0936 - loss: 2.8892
Epoch 4/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - categorical_accuracy: 0.1080 - loss: 2.5903
Epoch 5/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - categorical_accuracy: 0.1457 - loss: 2.3388
Epoch 6/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - categorical_accuracy: 0.1915 - loss: 2.2312
Epoch 7/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - categorical_accuracy: 0.2146 - loss: 2.1373
Epoch 8/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

<keras.src.callbacks.history.History at 0x147027c2b90>

In [34]:
test_loss, test_accuracy = CNN_model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - categorical_accuracy: 0.9126 - loss: 0.6980  
Test Loss: 0.690071702003479
Test Accuracy: 0.918749988079071


In [35]:
CNN_model.summary()

## Evaluation using Confusion Matrix and Accuracy - CNN

In [36]:
yhat = CNN_model.predict(X_test)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


In [37]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [38]:
confusion_matrix(ytrue, yhat)

array([[ 9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,
         0,  0,  0,  0],
       [ 0,  0,  8,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  1,  3,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  1,  0,  8,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  7,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  1,  0,  0,  0,  0,  0,  7,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  7,  0,  0,  0,  0,  0,  0,  0,
         0,  1,  0,  0],
       [ 0,  1,  0,  0,  0,  0,  0,  0,  0, 10,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0

In [39]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[151,   0],
        [  0,   9]],

       [[148,   2],
        [  1,   9]],

       [[150,   1],
        [  1,   8]],

       [[153,   3],
        [  1,   3]],

       [[155,   0],
        [  0,   5]],

       [[151,   0],
        [  1,   8]],

       [[151,   2],
        [  0,   7]],

       [[151,   1],
        [  1,   7]],

       [[152,   0],
        [  1,   7]],

       [[149,   0],
        [  1,  10]],

       [[153,   1],
        [  0,   6]],

       [[157,   0],
        [  0,   3]],

       [[150,   1],
        [  0,   9]],

       [[151,   0],
        [  0,   9]],

       [[152,   0],
        [  1,   7]],

       [[151,   1],
        [  2,   6]],

       [[149,   0],
        [  2,   9]],

       [[153,   1],
        [  0,   6]],

       [[150,   0],
        [  1,   9]],

       [[150,   0],
        [  0,  10]]], dtype=int64)

In [40]:
print(accuracy_score(ytrue, yhat))

0.91875


In [41]:
print(classification_report(ytrue, yhat, zero_division=0))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       0.82      0.90      0.86        10
           2       0.89      0.89      0.89         9
           3       0.50      0.75      0.60         4
           4       1.00      1.00      1.00         5
           5       1.00      0.89      0.94         9
           6       0.78      1.00      0.88         7
           7       0.88      0.88      0.88         8
           8       1.00      0.88      0.93         8
           9       1.00      0.91      0.95        11
          10       0.86      1.00      0.92         6
          11       1.00      1.00      1.00         3
          12       0.90      1.00      0.95         9
          13       1.00      1.00      1.00         9
          14       1.00      0.88      0.93         8
          15       0.86      0.75      0.80         8
          16       1.00      0.82      0.90        11
          17       0.86    

## Save Model - CNN

In [42]:
base_directory = r'C:\\Users\\erwin\\Desktop\\ASL_Translation_FYP\\Models'
CNN_model.save(f'{base_directory}\\CNN_Model.h5')


