### Imports

In [41]:
import cv2
import os

import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report

In [42]:
gesture_labels = {
    1: "DOT",  # Doing other things
    2: "DF",   # Drumming Fingers
    3: "NG",   # No gesture
    4: "PHI",  # Pulling Hand In
    5: "PTFI", # Pulling Two Fingers In
    6: "PHA",  # Pushing Hand Away
    7: "PTFA", # Pushing Two Fingers Away
    8: "RHB",  # Rolling Hand Backward
    9: "RHF",  # Rolling Hand Forward
    10: "SH",   # Shaking Hand
    11: "STFD", # Sliding Two Fingers Down
    12: "STFL", # Sliding Two Fingers Left
    13: "STFR", # Sliding Two Fingers Right
    14: "STFU", # Sliding Two Fingers Up
    15: "SS",   # Stop Sign
    16: "SD",   # Swiping Down
    17: "SL",   # Swiping Left
    18: "SR",   # Swiping Right
    19: "SU",   # Swiping Up
    20: "TD",   # Thumb Down
    21: "TU",   # Thumb Up
    22: "THC",  # Turning Hand Clockwise
    23: "THCC", # Turning Hand Counterclockwise
    24: "ZIFH", # Zooming In With Full Hand
    25: "ZIF2F",# Zooming In With Two Fingers
    26: "ZOFH", # Zooming Out With Full Hand
    27: "ZO2F"  # Zooming Out With Two Fingers
}


In [43]:
def process_dataset(dataset_path):
    data = []
    labels = []

    for folder_number in range(1, 100000):  # Iterate over folder numbers from 1 to 99999
        gesture_name = gesture_labels.get(folder_number, None)  # Get gesture name from mapping
        if gesture_name is None:
            continue  # Skip folders without a gesture label

        gesture_path = os.path.join(dataset_path, str(folder_number))
        
        if os.path.isdir(gesture_path):  # Ensure it's a directory
            for img_file in os.listdir(gesture_path):
                img_path = os.path.join(gesture_path, img_file)
                img = cv2.imread(img_path)

                # Preprocess the image (resize, normalize)
                img_resized = cv2.resize(img, (64, 64))  # Adjust size as needed
                img_normalized = img_resized / 255.0
                data.append(img_normalized)
                labels.append(folder_number)

    return np.array(data), np.array(labels)

dataset_path = "../dataset/20bn-jester-v1"
X, y = process_dataset(dataset_path)

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [45]:
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(64,64,3)),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(128, (3,3), activation='relu'),  # Added another convolutional layer
    MaxPooling2D(pool_size=(2,2)),
    Flatten(),
    Dense(256, activation='relu'),  # Increased hidden layer size
    Dropout(0.5),
    Dense(27, activation='softmax')
])

model.compile(
    optimizer=Adam(learning_rate=0.0001),  # Specify learning rate
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [46]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

history = model.fit(
    X_train, 
    y_train_encoded, 
    epochs=10, 
    validation_data=(X_test, y_test_encoded), 
    batch_size=32
)

Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 162ms/step - accuracy: 0.0550 - loss: 3.2686 - val_accuracy: 0.1392 - val_loss: 3.1665
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 154ms/step - accuracy: 0.2820 - loss: 3.0921 - val_accuracy: 0.5052 - val_loss: 2.8262
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 152ms/step - accuracy: 0.4068 - loss: 2.6415 - val_accuracy: 0.7680 - val_loss: 1.9095
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 157ms/step - accuracy: 0.6493 - loss: 1.7159 - val_accuracy: 0.9381 - val_loss: 0.7924
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 155ms/step - accuracy: 0.8129 - loss: 0.8539 - val_accuracy: 1.0000 - val_loss: 0.2565
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 159ms/step - accuracy: 0.9345 - loss: 0.3725 - val_accuracy: 1.0000 - val_loss: 0.1037
Epoch 7/10
[1m25/25[0m [3

In [49]:
# Ensure you're using the encoded test labels
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Optional: Confusion Matrix for deeper insights

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
# print("\nConfusion Matrix:")
# print(confusion_matrix(y_test_encoded, y_pred_classes))

# print("\nClassification Report:")
# print(classification_report(y_test_encoded, y_pred_classes, target_names=label_encoder.classes_))

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 1.0000 - loss: 0.0088
Test Accuracy: 1.00
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


In [51]:
model.save('gesture_recognition_model.keras')