In [1]:
import numpy as np
import cv2
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten, Dense, LSTM, TimeDistributed,
                                     Dropout, BatchNormalization, Bidirectional, GRU,
                                     GlobalAveragePooling2D, SpatialDropout1D, LayerNormalization, ConvLSTM2D)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

In [2]:
def load_video(video_path, frame_count=50, img_size=(64, 64)):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_gap = max(1, total_frames // frame_count)
    
    frames = np.zeros((frame_count, img_size[0], img_size[1], 3), dtype=np.float32)  # Preallocate memory
    
    i, j = 0, 0
    while i < total_frames and j < frame_count:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, img_size, interpolation=cv2.INTER_LINEAR) / 255.0
        frames[j] = frame
        i += frame_gap
        j += 1
    
    cap.release()
    return frames

In [3]:
def load_dataset(csv_file, base_dir, frame_count=50, img_size=(64, 64), num_classes=5):
    df = pd.read_csv(csv_file, sep=";", header=None, names=['filename', 'gesture', 'label'])
    X, y = [], []
    
    for _, row in df.iterrows():
        video_path = os.path.join(base_dir, row['filename'])
        if os.path.exists(video_path):
            X.append(load_video(video_path, frame_count, img_size))
            y.append(row['label'])
    
    return np.array(X, dtype=np.float32), to_categorical(y, num_classes=num_classes)

In [4]:
train_dir = "C:/Users/HP-PC/Desktop/Python data analytics/Hand Gesture Data/Project_data/train"
val_dir = "C:/Users/HP-PC/Desktop/Python data analytics/Hand Gesture Data/Project_data/val"
train_csv = "C:/Users/HP-PC/Desktop/Python data analytics/Hand Gesture Data/Project_data/train.csv"
val_csv = "C:/Users/HP-PC/Desktop/Python data analytics/Hand Gesture Data/Project_data/val.csv"


In [5]:
X_train, y_train = load_dataset(train_csv, train_dir)
X_val, y_val = load_dataset(val_csv, val_dir)

In [6]:
def create_tf_dataset(X, y, batch_size=8):
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    dataset = dataset.shuffle(len(X)).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

In [7]:
train_dataset = create_tf_dataset(X_train, y_train)
val_dataset = create_tf_dataset(X_val, y_val)

In [12]:
def build_model(input_shape=(50, 64, 64, 3), num_classes=5):
    model = Sequential([
        ConvLSTM2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', 
                   return_sequences=True, input_shape=input_shape),
        BatchNormalization(),
        TimeDistributed(MaxPooling2D(pool_size=(2, 2))),  # Fix applied

        ConvLSTM2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same', 
                   return_sequences=False),  # No return_sequences=True here
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),  # This is now correct

        GlobalAveragePooling2D(),

        Dense(128, activation='swish'),
        LayerNormalization(),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [13]:
callbacks = [
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6),
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint('gesture_recognition_model.keras', save_best_only=True, monitor='val_accuracy', mode='max')
]


In [14]:
model = build_model()
history = model.fit(train_dataset, validation_data=val_dataset, epochs=30, batch_size=8, callbacks=callbacks)

Epoch 1/30
[1m20/83[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m5:42[0m 5s/step - accuracy: 0.1798 - loss: 1.6189

KeyboardInterrupt: 

In [None]:
model.save("gesture_recognition_model.keras")

In [None]:
def plot_history(history):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Val Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Model Accuracy')
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Val Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Model Loss')
    
    plt.show()

plot_history(history)