# Model Building

20 Words to train (daily used words): <br>
"Family", "Friends", "Work", "School", "Home", "Car", "Happy", "Sad", "Play", "Help", "Eat", "Drink", "Sleep", "Sorry", "Computer", "Money", "Phone", "Cloth", "Me", "Stop"

## Import Library

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

## Keypoint using Mediapipe Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
# Color conversion 
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR to RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB to BGR
    return image, results

In [4]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=2), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=1)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=1)
                             ) 

## Extract Keypoint Values

In [5]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

## Data Preparation and Create Labels and Features

In [6]:
# Desired path for data
desired_path = "C:\\Users\\erwin\\Desktop\\ASL_Translation_FYP"

# Creating a subfolder for MP_Data within the desired path
DATA_PATH = os.path.join(desired_path, 'ASL_Dataset') 

# Actions that we try to detect
actions = np.array(["Family", "Friends", "Work", "School", "Home", "Car", "Happy", "Sad", "Play", 
                    "Help", "Eat", "Drink", "Sleep", "Sorry", "Computer", "Money", "Phone", "Cloth", "Me", "Stop"])

# Videos are going to be 30 frames in length
sequence_length = 30

In [7]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [8]:
label_map = {label:num for num, label in enumerate(actions)}

In [9]:
label_map

{'Family': 0,
 'Friends': 1,
 'Work': 2,
 'School': 3,
 'Home': 4,
 'Car': 5,
 'Happy': 6,
 'Sad': 7,
 'Play': 8,
 'Help': 9,
 'Eat': 10,
 'Drink': 11,
 'Sleep': 12,
 'Sorry': 13,
 'Computer': 14,
 'Money': 15,
 'Phone': 16,
 'Cloth': 17,
 'Me': 18,
 'Stop': 19}

In [10]:
sequences, labels = [], []
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [11]:
np.array(sequences).shape

(800, 30, 1662)

In [12]:
np.array(labels).shape

(800,)

In [13]:
X = np.array(sequences)

In [14]:
X.shape

(800, 30, 1662)

In [35]:
y = to_categorical(labels).astype(int)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Build and Train LSTM Neutral Network 

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [18]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [19]:
LSTM_model = Sequential()
LSTM_model.add(LSTM(64, return_sequences=True, input_shape=(30,1662)))
LSTM_model.add(LSTM(128, return_sequences=True))
LSTM_model.add(LSTM(64))
LSTM_model.add(Dense(64, activation='relu'))
LSTM_model.add(Dense(32, activation='relu'))
LSTM_model.add(Dense(actions.shape[0], activation='softmax'))

  super().__init__(**kwargs)


In [20]:
LSTM_model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [21]:
LSTM_model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])

Epoch 1/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 30ms/step - categorical_accuracy: 0.0518 - loss: 3.0033
Epoch 2/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - categorical_accuracy: 0.0512 - loss: 2.9975
Epoch 3/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - categorical_accuracy: 0.0671 - loss: 2.9997
Epoch 4/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - categorical_accuracy: 0.0765 - loss: 2.9947
Epoch 5/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - categorical_accuracy: 0.0648 - loss: 2.9901
Epoch 6/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 30ms/step - categorical_accuracy: 0.1105 - loss: 2.9188
Epoch 7/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 29ms/step - categorical_accuracy: 0.0593 - loss: 3.0654
Epoch 8/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

<keras.src.callbacks.history.History at 0x1c12f9d3890>

In [22]:
test_loss, test_accuracy = LSTM_model.evaluate(X_test, y_test)

# Print the test loss and test accuracy
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - categorical_accuracy: 0.8775 - loss: 0.4350
Test Loss: 0.473667711019516
Test Accuracy: 0.8812500238418579


In [23]:
LSTM_model.summary()

## Evaluation using Confusin Matrix and Accuracy - LSTM

In [24]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, classification_report, confusion_matrix

In [25]:
yhat = LSTM_model.predict(X_test)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


In [26]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [27]:
confusion_matrix(ytrue, yhat)

array([[ 9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,
         0,  0,  0,  0],
       [ 0,  0,  8,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  2,  2,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  7,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  2,  0,  0,  6,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0, 10,  0,  0,  0,  0,  0,  1,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0

In [28]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[151,   0],
        [  0,   9]],

       [[150,   0],
        [  1,   9]],

       [[149,   2],
        [  1,   8]],

       [[154,   2],
        [  2,   2]],

       [[153,   2],
        [  0,   5]],

       [[147,   4],
        [  0,   9]],

       [[153,   0],
        [  0,   7]],

       [[152,   0],
        [  0,   8]],

       [[152,   0],
        [  2,   6]],

       [[149,   0],
        [  1,  10]],

       [[153,   1],
        [  0,   6]],

       [[156,   1],
        [  0,   3]],

       [[151,   0],
        [  0,   9]],

       [[149,   2],
        [  0,   9]],

       [[151,   1],
        [  0,   8]],

       [[149,   3],
        [  1,   7]],

       [[149,   0],
        [  4,   7]],

       [[154,   0],
        [  3,   3]],

       [[150,   0],
        [  3,   7]],

       [[149,   1],
        [  1,   9]]], dtype=int64)

In [29]:
print(accuracy_score(ytrue, yhat))

0.88125


In [34]:
print(classification_report(ytrue, yhat, zero_division=0))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       1.00      0.90      0.95        10
           2       0.80      0.89      0.84         9
           3       0.50      0.50      0.50         4
           4       0.71      1.00      0.83         5
           5       0.69      1.00      0.82         9
           6       1.00      1.00      1.00         7
           7       1.00      1.00      1.00         8
           8       1.00      0.75      0.86         8
           9       1.00      0.91      0.95        11
          10       0.86      1.00      0.92         6
          11       0.75      1.00      0.86         3
          12       1.00      1.00      1.00         9
          13       0.82      1.00      0.90         9
          14       0.89      1.00      0.94         8
          15       0.70      0.88      0.78         8
          16       1.00      0.64      0.78        11
          17       1.00    

## Save Model

In [2]:
# Define the base directory where you want to save the model
base_directory = r'C:\\Users\\erwin\\Desktop\\ASL_Translation_FYP\\Models'

LSTM_model.save(f'{base_directory}\\LSTM_Model.h5')
LSTM_model.save(f'{base_directory}\\LSTM_Model.keras')



## Build and Train CNN

In [30]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.callbacks import TensorBoard

In [31]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [32]:
CNN_model = Sequential()
CNN_model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(30,1662)))
CNN_model.add(MaxPooling1D(pool_size=2))
CNN_model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
CNN_model.add(MaxPooling1D(pool_size=2))
CNN_model.add(Flatten())
CNN_model.add(Dense(64, activation='relu'))
CNN_model.add(Dense(32, activation='relu'))
CNN_model.add(Dense(actions.shape[0], activation='softmax'))

  super().__init__(


In [33]:
CNN_model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [34]:
CNN_model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])

Epoch 1/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - categorical_accuracy: 0.0562 - loss: 3.0511
Epoch 2/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - categorical_accuracy: 0.0423 - loss: 2.9535 
Epoch 3/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - categorical_accuracy: 0.0612 - loss: 2.8198
Epoch 4/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - categorical_accuracy: 0.0758 - loss: 2.7592 
Epoch 5/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - categorical_accuracy: 0.1038 - loss: 2.5847
Epoch 6/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - categorical_accuracy: 0.1197 - loss: 2.4624
Epoch 7/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - categorical_accuracy: 0.1530 - loss: 2.3703
Epoch 8/2000
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[

<keras.src.callbacks.history.History at 0x1a977a1fd10>

In [35]:
CNN_model.summary()

## Evaluation using Confusion Matrix and Accuracy - CNN

In [36]:
yhat = CNN_model.predict(X_test)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


In [37]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [38]:
confusion_matrix(ytrue, yhat)

array([[ 9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  9,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  8,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  4,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  7,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  7,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  8,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  1,  6,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  2,  0,  0,  0,  0,  0,  0,  0,  9,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0

In [39]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[151,   0],
        [  0,   9]],

       [[148,   2],
        [  1,   9]],

       [[151,   0],
        [  1,   8]],

       [[153,   3],
        [  0,   4]],

       [[155,   0],
        [  0,   5]],

       [[150,   1],
        [  2,   7]],

       [[152,   1],
        [  0,   7]],

       [[149,   3],
        [  0,   8]],

       [[152,   0],
        [  2,   6]],

       [[149,   0],
        [  2,   9]],

       [[153,   1],
        [  0,   6]],

       [[157,   0],
        [  0,   3]],

       [[151,   0],
        [  0,   9]],

       [[151,   0],
        [  0,   9]],

       [[152,   0],
        [  0,   8]],

       [[152,   0],
        [  0,   8]],

       [[149,   0],
        [  2,   9]],

       [[154,   0],
        [  1,   5]],

       [[150,   0],
        [  1,   9]],

       [[149,   1],
        [  0,  10]]], dtype=int64)

In [40]:
print(accuracy_score(ytrue, yhat))

0.925


In [41]:
print(classification_report(ytrue, yhat, zero_division=0))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       0.82      0.90      0.86        10
           2       1.00      0.89      0.94         9
           3       0.57      1.00      0.73         4
           4       1.00      1.00      1.00         5
           5       0.88      0.78      0.82         9
           6       0.88      1.00      0.93         7
           7       0.73      1.00      0.84         8
           8       1.00      0.75      0.86         8
           9       1.00      0.82      0.90        11
          10       0.86      1.00      0.92         6
          11       1.00      1.00      1.00         3
          12       1.00      1.00      1.00         9
          13       1.00      1.00      1.00         9
          14       1.00      1.00      1.00         8
          15       1.00      1.00      1.00         8
          16       1.00      0.82      0.90        11
          17       1.00    

## Save Model - CNN

In [4]:
# Define the base directory where you want to save the model
base_directory = r'C:\\Users\\erwin\\Desktop\\ASL_Translation_FYP\\Models'

CNN_model.save(f'{base_directory}\\CNN_Model.h5')
CNN_model.save(f'{base_directory}\\CNN_Model.keras')

