In [1]:
import numpy as np
import cv2
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten, Dense, LSTM, TimeDistributed,
                                     Dropout, BatchNormalization, Bidirectional)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

In [2]:
train_dir = r"C:\Users\HP-PC\Desktop\Python data analytics\Hand Gesture Data\Project_data\train"
val_dir = r"C:\Users\HP-PC\Desktop\Python data analytics\Hand Gesture Data\Project_data\val"
train_csv = r"C:\Users\HP-PC\Desktop\Python data analytics\Hand Gesture Data\Project_data\train.csv"
val_csv = r"C:\Users\HP-PC\Desktop\Python data analytics\Hand Gesture Data\Project_data\val.csv"

In [3]:
train_df = pd.read_csv(train_csv, sep=";", header=None, names=['filename', 'gesture', 'label'])
val_df = pd.read_csv(val_csv, sep=";", header=None, names=['filename', 'gesture', 'label'])


# Load and preprocess video frames

In [4]:
def load_video(video_path, frame_count=30, img_size=(64, 64)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while len(frames) < frame_count:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, img_size)
        frames.append(frame / 255.0)  # Normalize
    cap.release()
    
    # Padding
    while len(frames) < frame_count:
        frames.append(np.zeros((img_size[0], img_size[1], 3)))
    
    return np.array(frames)

# Load Dataset

In [5]:
def load_dataset(csv_file, base_dir, frame_count=30, img_size=(64, 64), num_classes=5):
    df = pd.read_csv(csv_file, sep=";", header=None, names=['filename', 'gesture', 'label'])
    X, y = [], []
    
    for _, row in df.iterrows():
        video_path = os.path.join(base_dir, row['filename'])
        if os.path.exists(video_path):
            frames = load_video(video_path, frame_count, img_size)
            X.append(frames)
            y.append(row['label'])
        else:
            print(f"Warning: Video file {video_path} not found!")  
    
    return np.array(X), to_categorical(y, num_classes=num_classes)

In [6]:
X_train, y_train = load_dataset(train_csv, train_dir)
X_val, y_val = load_dataset(val_csv, val_dir)

In [7]:
import pandas as pd

df = pd.read_csv(train_csv, sep=";", header=None)  # Try different delimiters if needed
print(df.head())  # Check what the first few rows look like


                                          0               1  2
0  WIN_20180925_17_08_43_Pro_Left_Swipe_new  Left_Swipe_new  0
1  WIN_20180925_17_18_28_Pro_Left_Swipe_new  Left_Swipe_new  0
2  WIN_20180925_17_18_56_Pro_Left_Swipe_new  Left_Swipe_new  0
3  WIN_20180925_17_19_51_Pro_Left_Swipe_new  Left_Swipe_new  0
4  WIN_20180925_17_20_14_Pro_Left_Swipe_new  Left_Swipe_new  0


# define CNN + BiLSTM + GRU model

In [7]:
def build_model(input_shape=(30, 64, 64, 3), num_classes=5):
    model = Sequential([
        TimeDistributed(Conv2D(16, (3, 3), activation='relu', padding='same'), input_shape=input_shape),
        TimeDistributed(BatchNormalization()),
        TimeDistributed(MaxPooling2D(2, 2)),

        TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding='same')),
        TimeDistributed(BatchNormalization()),
        TimeDistributed(MaxPooling2D(2, 2)),

        TimeDistributed(Flatten()),  # Replaces GlobalAveragePooling2D

        Bidirectional(LSTM(128, return_sequences=False, dropout=0.3)),  # Removed GRU
        
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')  
    ])
    model.compile(optimizer=Adam(learning_rate=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [10]:
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6),
    EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True),
    ModelCheckpoint('gesture_recognition_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')
]


In [11]:
model = build_model()
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=40, batch_size=16, callbacks=callbacks)

model.save("gesture_recognition_model.h5")

  super().__init__(**kwargs)


Epoch 1/40
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 3s/step - accuracy: 0.2348 - loss: 1.6095 - val_accuracy: 0.1800 - val_loss: 1.6094 - learning_rate: 5.0000e-04
Epoch 2/40
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 3s/step - accuracy: 0.1843 - loss: 1.6095 - val_accuracy: 0.1800 - val_loss: 1.6094 - learning_rate: 5.0000e-04
Epoch 3/40
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 3s/step - accuracy: 0.1577 - loss: 1.6095 - val_accuracy: 0.2100 - val_loss: 1.6091 - learning_rate: 5.0000e-04
Epoch 4/40
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 2s/step - accuracy: 0.1705 - loss: 1.6096 - val_accuracy: 0.2100 - val_loss: 1.6091 - learning_rate: 5.0000e-04
Epoch 5/40
[1m16/42[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m1:06[0m 3s/step - accuracy: 0.1539 - loss: 1.6100

KeyboardInterrupt: 

In [None]:
def plot_history(history):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Val Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Model Accuracy')
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Model Loss')
    
    plt.show()
plot_history(history)