***Develop a hand gesture recognition model that can accurately identify and classify different hand gestures from image or video data, enabling intuitive human-computer interaction and gesture-based control systems***

**Step 1: Setup**

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

**Step 2: Load Dataset**

In [6]:
import zipfile
import os

# Replace with your exact filename
zip_path = "/content/gesture_dataset.zip"
extract_to = "/content/gesture_dataset"

# Unzip
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

# Check if it's extracted properly
os.listdir("/content/gesture_dataset")


['train', 'test']

In [8]:
BATCH_SIZE = 32
IMG_SIZE = (128, 128)

train_dir = "/content/gesture_dataset/train/train"
test_dir = "/content/gesture_dataset/test/test"

train_ds = image_dataset_from_directory(
    train_dir,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='int'
)

val_ds = image_dataset_from_directory(
    test_dir,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='int'
)

# Prefetch for speed
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)


Found 18000 files belonging to 20 classes.
Found 6000 files belonging to 20 classes.


***Step 3: Data Augmentation***

In [9]:
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
])


***Step 4: Build the Model (Transfer Learning: MobileNetV2)***

In [16]:
# Image size and channels
IMG_SIZE = (128, 128)

# Load MobileNetV2 base model
base_model = MobileNetV2(input_shape=IMG_SIZE + (3,),
                         include_top=False,
                         weights='imagenet')
base_model.trainable = False  # Freeze pre-trained weights

# Create the model
model = Sequential([
    layers.Resizing(128, 128),  # Optional if image size varies
    layers.Rescaling(1./255),   # Normalize pixel values
    layers.Lambda(preprocess_input),  # ✅ Fix: wrap preprocess_input in Lambda
    data_augmentation,
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),
    layers.Dense(20, activation='softmax')  # 20 gesture classes
])


***Step 5: Compile the Model***

In [12]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


***Step 6: Train the Model***

In [13]:
callbacks = [
    EarlyStopping(patience=3, monitor='val_loss', restore_best_weights=True),
    ModelCheckpoint("best_model.h5", save_best_only=True)
]

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    callbacks=callbacks
)


Epoch 1/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 472ms/step - accuracy: 0.0764 - loss: 3.1063



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m358s[0m 622ms/step - accuracy: 0.0764 - loss: 3.1060 - val_accuracy: 0.2433 - val_loss: 2.5273
Epoch 2/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 458ms/step - accuracy: 0.1972 - loss: 2.5986



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m372s[0m 605ms/step - accuracy: 0.1973 - loss: 2.5985 - val_accuracy: 0.4158 - val_loss: 2.2152
Epoch 3/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 466ms/step - accuracy: 0.2713 - loss: 2.3558



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m386s[0m 613ms/step - accuracy: 0.2713 - loss: 2.3557 - val_accuracy: 0.6440 - val_loss: 2.0067
Epoch 4/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 461ms/step - accuracy: 0.3089 - loss: 2.2138



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m379s[0m 608ms/step - accuracy: 0.3089 - loss: 2.2138 - val_accuracy: 0.4547 - val_loss: 1.8627
Epoch 5/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 460ms/step - accuracy: 0.3418 - loss: 2.0981



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m381s[0m 607ms/step - accuracy: 0.3418 - loss: 2.0980 - val_accuracy: 0.7217 - val_loss: 1.7323
Epoch 6/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 460ms/step - accuracy: 0.3651 - loss: 2.0131



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m382s[0m 607ms/step - accuracy: 0.3652 - loss: 2.0131 - val_accuracy: 0.6035 - val_loss: 1.6392
Epoch 7/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 498ms/step - accuracy: 0.3890 - loss: 1.9343



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m362s[0m 644ms/step - accuracy: 0.3891 - loss: 1.9342 - val_accuracy: 0.7585 - val_loss: 1.5558
Epoch 8/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 491ms/step - accuracy: 0.4070 - loss: 1.8663



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m379s[0m 637ms/step - accuracy: 0.4070 - loss: 1.8663 - val_accuracy: 0.6257 - val_loss: 1.5055
Epoch 9/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 461ms/step - accuracy: 0.4152 - loss: 1.8237



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m342s[0m 608ms/step - accuracy: 0.4152 - loss: 1.8237 - val_accuracy: 0.8055 - val_loss: 1.4211
Epoch 10/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 459ms/step - accuracy: 0.4368 - loss: 1.7803



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m341s[0m 606ms/step - accuracy: 0.4368 - loss: 1.7802 - val_accuracy: 0.7610 - val_loss: 1.3640
Epoch 11/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468ms/step - accuracy: 0.4460 - loss: 1.7350



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m387s[0m 614ms/step - accuracy: 0.4460 - loss: 1.7349 - val_accuracy: 0.7783 - val_loss: 1.3291
Epoch 12/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 466ms/step - accuracy: 0.4553 - loss: 1.7037



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m381s[0m 612ms/step - accuracy: 0.4553 - loss: 1.7037 - val_accuracy: 0.8455 - val_loss: 1.3007
Epoch 13/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 464ms/step - accuracy: 0.4667 - loss: 1.6655



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m344s[0m 611ms/step - accuracy: 0.4668 - loss: 1.6655 - val_accuracy: 0.6155 - val_loss: 1.2753
Epoch 14/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step - accuracy: 0.4783 - loss: 1.6335



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m384s[0m 615ms/step - accuracy: 0.4783 - loss: 1.6335 - val_accuracy: 0.6922 - val_loss: 1.2274
Epoch 15/15
[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 468ms/step - accuracy: 0.4797 - loss: 1.6119



[1m563/563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m346s[0m 614ms/step - accuracy: 0.4797 - loss: 1.6119 - val_accuracy: 0.8645 - val_loss: 1.1752


***Step 7: Evaluate the Model***

In [14]:
loss, accuracy = model.evaluate(val_ds)
print(f"Test Accuracy: {accuracy:.4f}")

[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 366ms/step - accuracy: 0.8661 - loss: 1.1757
Test Accuracy: 0.8645


***Step 8: Make Predictions***

In [15]:
for images, labels in val_ds.take(1):
    preds = model.predict(images)
    predicted_labels = tf.argmax(preds, axis=1)
    print("Predictions:", predicted_labels.numpy())
    print("True Labels:", labels.numpy())


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Predictions: [ 2 18 18 18 15  7 18  9  8 18 13 15  7  8  1  6  5 15  2  2 15  0 18 18
 11  5 13  5  7 17 12 18]
True Labels: [ 2 17 19 18 15  7 18  9 10 19 13 15  7  8  1  6  5 15  2  2 15  0 18 15
 11  5 13  5  7 17 16 19]
