In [7]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Flatten, Dense,
    Dropout, Input, BatchNormalization, Add
)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
from google.colab import files
import zipfile
import shutil

# Constants
IMG_SIZE = (120, 120)
BATCH_SIZE = 32
CNN_EPOCHS = 50
GESTURE_CLASSES = [
    '01_palm', '02_fist', '03_thumbs-up', '04_thumbs-down',
    '05_index-right', '06_index-left', '07_no-gesture'
]


In [8]:
def setup_directories():
    for dir_name in ['data', 'outputs', 'models']:
        if os.path.exists(dir_name):
            shutil.rmtree(dir_name)
    os.makedirs('data/train', exist_ok=True)
    os.makedirs('data/test', exist_ok=True)
    os.makedirs('outputs', exist_ok=True)
    os.makedirs('models', exist_ok=True)

def load_and_extract_data():
    """Handle file upload and extraction in Colab."""
    print("Please upload your training data ZIP file")
    uploaded = files.upload()

    if not uploaded:
        raise ValueError("No file was uploaded")

    zip_filename = next(iter(uploaded.keys()))
    print(f"Extracting {zip_filename}...")

    with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
        zip_ref.extractall('data')

    # Verify the data structure
    for split in ['train', 'test']:
        for gesture_class in GESTURE_CLASSES:
            path = f'data/{split}/{gesture_class}'
            if not os.path.exists(path):
                raise ValueError(f"Expected directory not found: {path}")
            files_count = len([f for f in os.listdir(path) if f.endswith(('.jpg', '.png', '.jpeg'))])
            print(f"Found {files_count} images in {path}")

# Run setup and upload
setup_directories()
load_and_extract_data()


Please upload your training data ZIP file


Saving data_to_upload.zip to data_to_upload (1).zip
Extracting data_to_upload (1).zip...
Found 900 images in data/train/01_palm
Found 900 images in data/train/02_fist
Found 900 images in data/train/03_thumbs-up
Found 900 images in data/train/04_thumbs-down
Found 900 images in data/train/05_index-right
Found 900 images in data/train/06_index-left
Found 65 images in data/train/07_no-gesture
Found 300 images in data/test/01_palm
Found 300 images in data/test/02_fist
Found 300 images in data/test/03_thumbs-up
Found 300 images in data/test/04_thumbs-down
Found 300 images in data/test/05_index-right
Found 300 images in data/test/06_index-left
Found 15 images in data/test/07_no-gesture


In [9]:
def create_data_generators():
    """Create train and test data generators with augmentation."""
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        brightness_range=[0.8, 1.2],
        fill_mode='nearest'
    )
    test_datagen = ImageDataGenerator(rescale=1./255)
    return train_datagen, test_datagen

def load_datasets():
    """Load and prepare the datasets."""
    train_datagen, test_datagen = create_data_generators()

    training_set = train_datagen.flow_from_directory(
        'data/train',
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        color_mode='grayscale',
        shuffle=True,
        classes=GESTURE_CLASSES
    )

    test_set = test_datagen.flow_from_directory(
        'data/test',
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        color_mode='grayscale',
        shuffle=False,
        classes=GESTURE_CLASSES
    )

    return training_set, test_set

# Load datasets
training_set, test_set = load_datasets()


Found 5465 images belonging to 7 classes.
Found 1815 images belonging to 7 classes.


In [10]:
def build_model():
    """Build the CNN model with residual connections."""
    inputs = Input(shape=(*IMG_SIZE, 1))

    # First conv block
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.25)(x)

    # Second conv block with residual connection
    skip = Conv2D(64, (1, 1), strides=(2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Add()([x, skip])  # Use Add() layer for residual connection
    x = BatchNormalization()(x)
    x = Dropout(0.25)(x)

    # Final layers
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    outputs = Dense(len(GESTURE_CLASSES), activation='softmax')(x)

    model = Model(inputs, outputs)
    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

# Build the model and print summary
model = build_model()
model.summary()


In [11]:
def train_model(model, training_set, test_set):
    """Train the model with callbacks."""
    callbacks = [
        EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        ),
        ModelCheckpoint(
            'models/best_model.keras',
            monitor='val_accuracy',
            save_best_only=True
        )
    ]

    history = model.fit(
        training_set,
        validation_data=test_set,
        epochs=CNN_EPOCHS,
        callbacks=callbacks
    )

    return history

# Train the model
history = train_model(model, training_set, test_set)


Epoch 1/50
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 118ms/step - accuracy: 0.6497 - loss: 1.1601 - val_accuracy: 0.1653 - val_loss: 2.6132
Epoch 2/50
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 85ms/step - accuracy: 0.9176 - loss: 0.2387 - val_accuracy: 0.3857 - val_loss: 1.1745
Epoch 3/50
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 89ms/step - accuracy: 0.9351 - loss: 0.1958 - val_accuracy: 0.6937 - val_loss: 0.4857
Epoch 4/50
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 84ms/step - accuracy: 0.9584 - loss: 0.1380 - val_accuracy: 1.0000 - val_loss: 0.0175
Epoch 5/50
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 76ms/step - accuracy: 0.9558 - loss: 0.1301 - val_accuracy: 1.0000 - val_loss: 0.0283
Epoch 6/50
[1m171/171[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 78ms/step - accuracy: 0.9687 - loss: 0.0977 - val_accuracy: 1.0000 - val_loss: 0.0017
Epoch 7/50
[1m

In [12]:
def plot_training_history(history):
    """Plot and save training metrics."""
    plt.style.use('ggplot')
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.savefig('outputs/training_history.png')
    plt.close()

# Plot the training history
plot_training_history(history)


In [13]:
def evaluate_model(model, test_set):
    """Evaluate the model and generate metrics."""
    test_set.reset()
    y_pred = model.predict(test_set)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = test_set.classes

    # Generate confusion matrix
    cm = confusion_matrix(y_true, y_pred_classes)
    plt.figure(figsize=(12, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=[c.replace('_', '\n') for c in GESTURE_CLASSES],
                yticklabels=[c.replace('_', '\n') for c in GESTURE_CLASSES])
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.savefig('outputs/confusion_matrix.png')
    plt.close()

    # Generate and save classification report
    report = classification_report(y_true, y_pred_classes,
                                   target_names=GESTURE_CLASSES,
                                   output_dict=True)

    with open('outputs/classification_report.txt', 'w') as f:
        f.write("Classification Report:\n")
        f.write(classification_report(y_true, y_pred_classes, target_names=GESTURE_CLASSES))

    return report

# Evaluate the model
report = evaluate_model(model, test_set)


[1m57/57[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step


In [14]:
# Save final model
model.save('models/final_gesture_model.h5')

# Download results
files.download('models/final_gesture_model.h5')
files.download('outputs/training_history.png')
files.download('outputs/confusion_matrix.png')
files.download('outputs/classification_report.txt')




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>