In [None]:
# method to mount data from google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# MediaPipe Landmark Extraction

In [None]:
!pip install mediapipe==0.10.9
!pip install tdqm

Collecting tdqm
  Downloading tdqm-0.0.1.tar.gz (1.4 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: tdqm
  Building wheel for tdqm (setup.py) ... [?25l[?25hdone
  Created wheel for tdqm: filename=tdqm-0.0.1-py3-none-any.whl size=1322 sha256=019a0d6e685d7a8e0850e989740de9c471e45e41b68d6ee9abc50f1df039bdf9
  Stored in directory: /root/.cache/pip/wheels/c8/c7/30/e5935be2cfa6883be72462333edc414d8928f3c78eaabec38a
Successfully built tdqm
Installing collected packages: tdqm
Successfully installed tdqm-0.0.1


# MediaPipe Landmark Extraction

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results):
    mp_holistic = mp.solutions.holistic  # Holistic model
    mp_drawing = mp.solutions.drawing_utils  # Drawing utilities
    # Draw left hand connections
    image = mp_drawing.draw_landmarks(
            image,
            landmark_list=results.left_hand_landmarks,
            connections=mp_holistic.HAND_CONNECTIONS,
            landmark_drawing_spec=mp_drawing.DrawingSpec(
                color=(232, 254, 255), thickness=1, circle_radius=4
            ),
            connection_drawing_spec=mp_drawing.DrawingSpec(
                color=(255, 249, 161), thickness=2, circle_radius=2
            ),
    )
    # Draw right hand connections
    image = mp_drawing.draw_landmarks(
            image,
            landmark_list=results.right_hand_landmarks,
            connections=mp_holistic.HAND_CONNECTIONS,
            landmark_drawing_spec=mp_drawing.DrawingSpec(
                color=(232, 254, 255), thickness=1, circle_radius=4
            ),
            connection_drawing_spec=mp_drawing.DrawingSpec(
                color=(255, 249, 161), thickness=2, circle_radius=2
            ),
    )
    return image

In [None]:
# --- HandModel class ---
class HandModel:
    def __init__(self, landmarks):
        self.connections = mp.solutions.holistic.HAND_CONNECTIONS
        landmarks = np.array(landmarks).reshape((21, 3))
        self.feature_vector = self._get_feature_vector(landmarks)

    def _get_connections_from_landmarks(self, landmarks):
        return [landmarks[j] - landmarks[i] for (i, j) in self.connections]

    def _get_feature_vector(self, landmarks):
        vectors = self._get_connections_from_landmarks(landmarks)
        angles = []
        for v1 in vectors:
            for v2 in vectors:
                dot = np.dot(v1, v2)
                norm = np.linalg.norm(v1) * np.linalg.norm(v2)
                angle = np.arccos(dot / norm) if norm != 0 else 0
                angles.append(angle if not np.isnan(angle) else 0)
        return angles

# --- Process each video ---
def extract_hand_features_from_video(video_path, label):
    mp_holistic = mp.solutions.holistic
    cap = cv2.VideoCapture(video_path)

    left_hand_list = []
    right_hand_list = []

    with mp_holistic.Holistic(static_image_mode=False) as holistic:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            # Convert color and detect landmarks
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = holistic.process(image)

            # Extract hand landmarks (or zeros if not detected)
            def extract_hand(landmarks):
                if landmarks:
                    return [[lm.x, lm.y, lm.z] for lm in landmarks.landmark]
                else:
                    return [[0, 0, 0]] * 21

            left = extract_hand(results.left_hand_landmarks)
            right = extract_hand(results.right_hand_landmarks)

            # Convert to 441-dim vector or zero-vector
            left_feat = HandModel(left).feature_vector if np.sum(left) != 0 else [0] * 441
            right_feat = HandModel(right).feature_vector if np.sum(right) != 0 else [0] * 441

            left_hand_list.append(left_feat)
            right_hand_list.append(right_feat)

    cap.release()
    return {
        "left": np.array(left_hand_list),
        "right": np.array(right_hand_list),
        "label": label
    }

In [None]:
root_dir = "GRU Dataset/Videos" # path to access videos dataset
output_dir = "GRU Dataset/hand_landmarks" # path to save dataset
os.makedirs(output_dir, exist_ok=True)

for sign in tqdm(os.listdir(root_dir)):
    sign_dir = os.path.join(root_dir, sign)
    save_sign_dir = os.path.join(output_dir, sign)
    os.makedirs(save_sign_dir, exist_ok=True)

    for filename in os.listdir(sign_dir):
        if filename.endswith(".mkv"):
            video_path = os.path.join(sign_dir, filename)
            features = extract_hand_features_from_video(video_path, label=filename.split(".")[0])
            save_path = os.path.join(save_sign_dir, filename.replace(".mkv", ".npy"))
            np.save(save_path, features)

In [None]:
import numpy as np
import glob
import os
from sklearn.preprocessing import LabelBinarizer

# Helper function to process each .npy file
def process_npy(path):
    data = np.load(path, allow_pickle=True).item()
    left = data['left']
    right = data['right']

    # Pad or truncate to 30 frames
    n_frames = 30
    def pad_sequence(seq):
      seq = np.array(seq)
      if seq.ndim == 1:
        seq = np.expand_dims(seq, axis=0)  # shape (1, 441)
      if seq.shape[0] < n_frames:
        padding = np.zeros((n_frames - seq.shape[0], 441))
        seq = np.concatenate([seq, padding])
      else:
        seq = seq[:n_frames]
      return seq


    left = pad_sequence(left)
    right = pad_sequence(right)

    # Combine left and right hand embeddings
    sequence = np.concatenate([left, right], axis=-1)  # shape: (30, 882)
    label = data['label'].split('_')[0]  # e.g., 'Congratulations' from 'Congratulations_00...'
    return sequence, label

# Collect data
X = []
labels = []

for path in sorted(glob.glob("GRU Dataset/hand_landmarks/**/*.npy", recursive=True)): # path to access hand_landmarks dataset
    sequence, label = process_npy(path)
    X.append(sequence)
    labels.append(label)

X = np.array(X)  # shape (750, 30, 882)

# One-hot encode labels
encoder = LabelBinarizer()
y = encoder.fit_transform(labels)  # shape (750, 15)
# Print the label order used by LabelBinarizer
print("Label order used:", encoder.classes_)

# Check a sample
idx = 0
print("X[idx] file comes from label:", labels[idx])  # original label
print("Encoded label one-hot:", y[idx])              # one-hot encoded
print("Decoded label:", encoder.inverse_transform(np.array([y[idx]]))[0])  # decoded back


print("✅ X shape:", X.shape)
print("✅ y shape:", y.shape)


Label order used: ['Congratulations' 'Excuse' 'Good-afternoon' 'Good-bye' 'Good-evening'
 'Good-morning' 'Good-night' 'Happy-birthday' 'Hi,-hello' 'How-are-you'
 'Name' 'No' 'Sorry' 'Thank-you' 'You-are-welcome']
X[idx] file comes from label: Congratulations
Encoded label one-hot: [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Decoded label: Congratulations
✅ X shape: (750, 30, 882)
✅ y shape: (750, 15)


In [None]:
X_train, X_test, y_train, y_test, labels_train, labels_test = train_test_split(
    X, y, labels, test_size=0.2, stratify=y, random_state=42)

# Verify the alignment
idx = 1
print("Original label:", labels_train[idx])  # original label
print("Encoded label one-hot:", y_train[idx])  # one-hot encoded
print("Decoded label:", encoder.inverse_transform(np.array([y_train[idx]]))[0])  # decoded label back to text

Original label: Name
Encoded label one-hot: [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
Decoded label: Name


# Inspection on X and y

In [None]:
# Inspect the shape and some content of the first few sequences in X
print("Shape of X:", X.shape)
print("First sequence (left hand and right hand combined) shape:", X[0].shape)
print("First sequence (data):\n", X[0])

# Check the label corresponding to the first sequence
print("First label:", labels[0])

# Verify that labels match the corresponding sequences (e.g., 15 unique classes)
print("Unique labels:", np.unique(labels))

Shape of X: (750, 30, 882)
First sequence (left hand and right hand combined) shape: (30, 882)
First sequence (data):
 [[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.18863386 2.21773203 ... 0.10121714 0.04432434 0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]
First label: Congratulations
Unique labels: ['Congratulations' 'Excuse' 'Good-afternoon' 'Good-bye' 'Good-evening'
 'Good-morning' 'Good-night' 'Happy-birthday' 'Hi,-hello' 'How-are-you'
 'Name' 'No' 'Sorry' 'Thank-you' 'You-are-welcome']


In [None]:
# Inspect the shape and some content of the first few sequences in X
print("Shape of y:", y.shape)
print("First sequence (left hand and right hand combined) shape:", y[0].shape)
print("First sequence (data):\n", y[0])

# Check the label corresponding to the first sequence
print("First label:", labels[0])

# Verify that labels match the corresponding sequences (e.g., 15 unique classes)
print("Unique labels:", np.unique(labels))

Shape of y: (750, 15)
First sequence (left hand and right hand combined) shape: (15,)
First sequence (data):
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
First label: Congratulations
Unique labels: ['Congratulations' 'Excuse' 'Good-afternoon' 'Good-bye' 'Good-evening'
 'Good-morning' 'Good-night' 'Happy-birthday' 'Hi,-hello' 'How-are-you'
 'Name' 'No' 'Sorry' 'Thank-you' 'You-are-welcome']


In [None]:
import numpy as np

# Saving the training and test data (features and labels) to .npy files
np.save('GRU Dataset/X_train.npy', X_train) # path to save X_train file
np.save('GRU Dataset/X_test.npy', X_test) # path to save X_test file
np.save('GRU Dataset/y_train.npy', y_train) # path to save y_train file
np.save('GRU Dataset/y_test.npy', y_test) # path to save y_test file

# Saving the labels for training and testing (if needed)
np.save('GRU Dataset/labels_train.npy', labels_train) # path to save labels_train file 
np.save('GRU Dataset/labels_test.npy', labels_test) # path to save labels_test file

print("Data saved successfully!")

In [None]:
print("labels_train length:", len(labels_train))
print("labels_train sample:", labels_train[:5])

labels_train length: 600
labels_train sample: ['Good-bye', 'Name', 'Happy-birthday', 'Excuse', 'Sorry']


In [None]:
# Inspect the shape and some content of the first few sequences in X
print("Shape of X:", X_train.shape)
print("First sequence (left hand and right hand combined) shape:", X_train[0].shape)
print("First sequence (data):\n", X_train[0])

# Check the label corresponding to the first sequence
print("First label:", labels_train[0])
print("First label in y_train: ",y_train[0])

# Verify that labels match the corresponding sequences (e.g., 15 unique classes)
print("Unique labels:", np.unique(labels))

Shape of X: (600, 30, 882)
First sequence (left hand and right hand combined) shape: (30, 882)
First sequence (data):
 [[0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 1.99102011e-01
  1.80539208e-02 2.10734243e-08]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 1.99373491e-01
  1.38549548e-02 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 2.08303783e-01
  1.20770765e-02 0.00000000e+00]]
First label: Good-bye
First label in y_train:  [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
Unique labels: ['Congratulations' 'Excuse' 'Good-afternoon' 'Good-bye' 'Good-evening'
 'Good-morning' 'Good-night' 'Happy-birthday' 'Hi,-hello' 'How-are-you'
 'Name' 'No' 'Sorry' 'Thank-you' 'You-are-we

In [None]:
# Inspect the shape and some content of the first few sequences in X
print("Shape of y:", y_train.shape)
print("First sequence (left hand and right hand combined) shape:", y_train[0].shape)
print("First sequence (data):\n", y_train[0])

# Check the label corresponding to the first sequence
print("First label:", labels_train[0])

# Verify that labels match the corresponding sequences (e.g., 15 unique classes)
print("Unique labels:", np.unique(labels))

Shape of y: (600, 15)
First sequence (left hand and right hand combined) shape: (15,)
First sequence (data):
 [0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
First label: Good-bye
Unique labels: ['Congratulations' 'Excuse' 'Good-afternoon' 'Good-bye' 'Good-evening'
 'Good-morning' 'Good-night' 'Happy-birthday' 'Hi,-hello' 'How-are-you'
 'Name' 'No' 'Sorry' 'Thank-you' 'You-are-welcome']


In [None]:
# Inspect the shape and some content of the first few sequences in X
print("Shape of X:", X_test.shape)
print("First sequence (left hand and right hand combined) shape:", X_test[0].shape)
print("First sequence (data):\n", X_test[0])

# Check the label corresponding to the first sequence
print("First label:", labels_test[0])

# Verify that labels match the corresponding sequences (e.g., 15 unique classes)
print("Unique labels:", np.unique(labels))

Shape of X: (150, 30, 882)
First sequence (left hand and right hand combined) shape: (30, 882)
First sequence (data):
 [[0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [2.10734243e-08 3.03998786e-01 1.38895722e+00 ... 1.75888413e+00
  4.33582060e-01 0.00000000e+00]
 [0.00000000e+00 3.94697377e-01 8.72727391e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 4.72851941e-01 7.87183823e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]]
First label: Congratulations
Unique labels: ['Congratulations' 'Excuse' 'Good-afternoon' 'Good-bye' 'Good-evening'
 'Good-morning' 'Good-night' 'Happy-birthday' 'Hi,-hello' 'How-are-you'
 'Name' 'No' 'Sorry' 'Thank-you' 'You-are-welcome']


In [None]:
# Inspect the shape and some content of the first few sequences in X
print("Shape of y:", y_test.shape)
print("First sequence (left hand and right hand combined) shape:", y_test[0].shape)
print("First sequence (data):\n", y_test[0])

# Check the label corresponding to the first sequence
print("First label:", labels_test[0])

# Verify that labels match the corresponding sequences (e.g., 15 unique classes)
print("Unique labels:", np.unique(labels))

Shape of y: (150, 15)
First sequence (left hand and right hand combined) shape: (15,)
First sequence (data):
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
First label: Congratulations
Unique labels: ['Congratulations' 'Excuse' 'Good-afternoon' 'Good-bye' 'Good-evening'
 'Good-morning' 'Good-night' 'Happy-birthday' 'Hi,-hello' 'How-are-you'
 'Name' 'No' 'Sorry' 'Thank-you' 'You-are-welcome']


# Build and Train GRU model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, LSTM, Dense, Input, Dropout, BatchNormalization, LeakyReLU, GRU
from tensorflow.keras.callbacks import TensorBoard
import tensorflow as tf

In [None]:
model_GRU = Sequential([
    Masking(mask_value=0.0, input_shape=(30, 882)),
    GRU(64, return_sequences=False, activation='tanh',
        kernel_regularizer=tf.keras.regularizers.L2(l2=0.044),
        recurrent_regularizer=tf.keras.regularizers.L2(l2=0.014)),
        Dense(64, kernel_regularizer=tf.keras.regularizers.L2(l2=0.027)),
        BatchNormalization(),
        LeakyReLU(),
        Dense(32, kernel_regularizer=tf.keras.regularizers.L2(l2=0.027)),
        BatchNormalization(),
        LeakyReLU(),
        Dense(15, activation='softmax')
])

  super().__init__(**kwargs)


In [None]:
# Essential
# learning rate decay, used it because the loss was abruptly increasing while fitting the model, which
# meant that learning rate was note small enough
# PS. The problem was alleviated more after implementing batch normalization
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
    initial_learning_rate= 0.002 ,
    decay_steps=8,
    decay_rate=0.5,
    staircase=False)

In [None]:
model_GRU.compile(optimizer= tf.optimizers.Adam(learning_rate=lr_schedule), loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
model_GRU.summary()

In [None]:
class_weights = {i: weight for i, weight in enumerate([1.0, 2.0, 3.0, 1.5, 1.2, 1.1, 1.8, 1.5, 1.4, 2.0, 1.0, 1.7, 1.6, 1.2, 1.8])}
history_GRU = model_GRU.fit(X_train, y_train, epochs=150, batch_size=64, validation_split=0.0667, class_weight=class_weights)

Epoch 1/150
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 291ms/step - categorical_accuracy: 0.1669 - loss: 20.5397 - val_categorical_accuracy: 0.1220 - val_loss: 15.7373
Epoch 2/150
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 89ms/step - categorical_accuracy: 0.3676 - loss: 15.5323 - val_categorical_accuracy: 0.1951 - val_loss: 13.0235
Epoch 3/150
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 83ms/step - categorical_accuracy: 0.4905 - loss: 12.8025 - val_categorical_accuracy: 0.1707 - val_loss: 11.3307
Epoch 4/150
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 89ms/step - categorical_accuracy: 0.5381 - loss: 11.1192 - val_categorical_accuracy: 0.1707 - val_loss: 10.2027
Epoch 5/150
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 82ms/step - categorical_accuracy: 0.5638 - loss: 10.1065 - val_categorical_accuracy: 0.1707 - val_loss: 9.4181
Epoch 6/150
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s

In [None]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, classification_report

In [None]:
yhat = model_GRU.predict(X_test)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 129ms/step


In [None]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [None]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[139,   1],
        [  4,   6]],

       [[128,  12],
        [  0,  10]],

       [[135,   5],
        [  2,   8]],

       [[136,   4],
        [  4,   6]],

       [[137,   3],
        [  3,   7]],

       [[137,   3],
        [  4,   6]],

       [[135,   5],
        [  3,   7]],

       [[139,   1],
        [  6,   4]],

       [[138,   2],
        [  3,   7]],

       [[139,   1],
        [  2,   8]],

       [[140,   0],
        [  0,  10]],

       [[140,   0],
        [  1,   9]],

       [[140,   0],
        [  4,   6]],

       [[139,   1],
        [  1,   9]],

       [[140,   0],
        [  1,   9]]])

In [None]:
accuracy_score(ytrue, yhat)

0.7466666666666667

In [None]:
print(classification_report(ytrue, yhat, target_names=['Congratulations', 'Excuse', 'Good-afternoon', 'Good-bye', 'Good-evening',
                                                      'Good-morning', 'Good-night', 'Happy-birthday', 'Hi,-hello',
                                                      'How-are-you', 'Name', 'No', 'Sorry', 'Thank-you', 'You-are-welcome']))

                 precision    recall  f1-score   support

Congratulations       0.86      0.60      0.71        10
         Excuse       0.45      1.00      0.62        10
 Good-afternoon       0.62      0.80      0.70        10
       Good-bye       0.60      0.60      0.60        10
   Good-evening       0.70      0.70      0.70        10
   Good-morning       0.67      0.60      0.63        10
     Good-night       0.58      0.70      0.64        10
 Happy-birthday       0.80      0.40      0.53        10
      Hi,-hello       0.78      0.70      0.74        10
    How-are-you       0.89      0.80      0.84        10
           Name       1.00      1.00      1.00        10
             No       1.00      0.90      0.95        10
          Sorry       1.00      0.60      0.75        10
      Thank-you       0.90      0.90      0.90        10
You-are-welcome       1.00      0.90      0.95        10

       accuracy                           0.75       150
      macro avg       0.79   

# Save GRU Model

In [None]:
model_GRU.save("/content/drive/My Drive/GRUModel.h5") 

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_GRU)

# ✅ Enable resource variables (needed for GRU)
converter.experimental_enable_resource_variables = True

# ✅ Include Select TF Ops (needed for unsupported ops like TensorListReserve)
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,        # enable built-in ops
    tf.lite.OpsSet.SELECT_TF_OPS           # include select TensorFlow ops
]

# ✅ Disable lowering TensorList ops (helps with GRU/LSTM)
converter._experimental_lower_tensor_list_ops = False

# 🔄 Convert
model_tflite = converter.convert()

# 💾 Save
with open("/content/drive/My Drive/GRUModel.tflite", "wb") as f:
    f.write(model_tflite)

print("✅ TFLite model with Select TF Ops saved successfully.")