<a href="https://colab.research.google.com/github/Himanshu53998/PRODIGY_ML_04/blob/main/ML_Task4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
"""Hand Gesture Recognition - Task 4 (CNN Model).ipynb

CNN model for better accuracy on LeapGestRecog dataset
"""

# ============================================================================
# CELL 1: SETUP AND LOAD DATASET
# ============================================================================
print("üöÄ Setting up CNN for Hand Gesture Recognition...")

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install required packages
!pip install -q tensorflow opencv-python scikit-learn matplotlib seaborn

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
import pickle
import json
import random
from google.colab import files

# TensorFlow/Keras for CNN
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Sklearn for preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import warnings
warnings.filterwarnings('ignore')

# Set seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

print("‚úÖ Libraries imported successfully!")

In [None]:
# ============================================================================
# CELL 2: LOAD YOUR DATASET FROM DRIVE
# ============================================================================
print("üìÇ Loading dataset from your Google Drive...")

# Path to your dataset
dataset_path = '/content/drive/MyDrive/leapGestRecog'

# Verify dataset exists
if not os.path.exists(dataset_path):
    print(f"‚ùå Dataset not found at: {dataset_path}")
    print("Please check the path and update if needed")
    # Try to find it
    !find /content/drive/MyDrive -name "*leap*" -type d 2>/dev/null
else:
    print(f"‚úÖ Dataset found at: {dataset_path}")

# List all subjects and gestures
subjects = sorted([s for s in os.listdir(dataset_path) if s.isdigit()])
first_subject = os.path.join(dataset_path, subjects[0])
gesture_classes = sorted(os.listdir(first_subject))

print(f"\nüìä Dataset Information:")
print(f"   Subjects: {len(subjects)} ({subjects[:3]}...)")
print(f"   Gesture Classes: {len(gesture_classes)}")
print(f"   Gestures: {gesture_classes}")

# Show sample counts
sample_path = os.path.join(dataset_path, subjects[0], gesture_classes[0])
sample_images = [f for f in os.listdir(sample_path) if f.endswith('.png')]
print(f"   Images per gesture per subject: ~{len(sample_images)}")
print(f"   Total estimated images: {len(subjects) * len(gesture_classes) * len(sample_images)}")

In [None]:
# ============================================================================
# CELL 3: ENHANCED DATA LOADER FOR CNN
# ============================================================================
print("\nüì• Loading and preprocessing images for CNN...")

def load_data_for_cnn(num_gestures=10, samples_per_gesture=400, img_size=64, augment=False):
    """
    Load data optimized for CNN training
    """
    X = []
    y = []

    # Use selected gestures
    selected_gestures = gesture_classes[:num_gestures]

    print(f"üîß Loading {num_gestures} gestures: {selected_gestures}")
    print(f"   Image size: {img_size}x{img_size}")
    print(f"   Samples per gesture: {samples_per_gesture}")
    print(f"   Data augmentation: {augment}")

    # Use 8 subjects for training, 2 for validation (if we had more data)
    train_subjects = subjects[:8]

    for gesture_idx, gesture in enumerate(selected_gestures):
        gesture_samples = 0

        for subject in train_subjects:
            gesture_path = os.path.join(dataset_path, subject, gesture)

            if not os.path.exists(gesture_path):
                continue

            # Get image files
            image_files = [f for f in os.listdir(gesture_path)
                          if f.endswith('.png')][:samples_per_gesture//len(train_subjects)]

            for img_file in image_files:
                if gesture_samples >= samples_per_gesture:
                    break

                img_path = os.path.join(gesture_path, img_file)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

                if img is not None:
                    # Resize
                    img_resized = cv2.resize(img, (img_size, img_size))

                    # Normalize to [0, 1]
                    img_normalized = img_resized / 255.0

                    # Add channel dimension for CNN
                    img_final = np.expand_dims(img_normalized, axis=-1)

                    X.append(img_final)
                    y.append(gesture)
                    gesture_samples += 1

        print(f"   ‚úÖ {gesture}: {gesture_samples} samples")

    X = np.array(X)
    y = np.array(y)

    print(f"\n‚úÖ Data loaded successfully!")
    print(f"   X shape: {X.shape}")
    print(f"   y shape: {y.shape}")
    print(f"   Memory usage: {X.nbytes / (1024**3):.2f} GB")

    return X, y, selected_gestures

# Load data with good parameters for CNN
X, y, gesture_names = load_data_for_cnn(
    num_gestures=10,           # ALL 10 gestures
    samples_per_gesture=400,   # Good amount for training
    img_size=64                # Good resolution for CNN
)

# Show sample images
print("\nüëÄ Sample images:")
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
for i in range(10):
    idx = random.randint(0, len(X)-1)
    row = i // 5
    col = i % 5
    axes[row, col].imshow(X[idx].squeeze(), cmap='gray')
    axes[row, col].set_title(f"{y[idx]}")
    axes[row, col].axis('off')
plt.suptitle("Sample Training Images", fontsize=16)
plt.tight_layout()
plt.show()

In [None]:
# ============================================================================
# CELL 4: PREPARE DATA FOR TRAINING
# ============================================================================
print("‚öôÔ∏è Preparing data for CNN training...")

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Further split train into train/validation (80/10/10 split)
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.125, random_state=42, stratify=y_train  # 0.125 * 0.8 = 0.1
)

print("üìä Data Split Summary:")
print(f"   Training set:   {X_train.shape[0]} samples")
print(f"   Validation set: {X_val.shape[0]} samples")
print(f"   Test set:       {X_test.shape[0]} samples")
print(f"   Input shape:    {X_train.shape[1:]}")
print(f"   Number of classes: {len(gesture_names)}")
print(f"   Classes: {gesture_names}")

# Class distribution
print("\nüìà Class Distribution:")
unique, counts = np.unique(y_train, return_counts=True)
for cls, count in zip(unique, counts):
    print(f"   Class {cls} ({le.inverse_transform([cls])[0]}): {count} samples")

In [None]:
# ============================================================================
# CELL 5: BUILD CNN MODEL
# ============================================================================
print("üèóÔ∏è Building CNN Model Architecture...")

# Model parameters
input_shape = X_train.shape[1:]  # (64, 64, 1)
num_classes = len(gesture_names)

# Clear any existing models
keras.backend.clear_session()

# Build the CNN model
model = models.Sequential([
    # First Convolutional Block
    layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape),
    layers.BatchNormalization(),
    layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),

    # Second Convolutional Block
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),

    # Third Convolutional Block
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),

    # Flatten and Dense layers
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),

    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    # Output layer
    layers.Dense(num_classes, activation='softmax')
])

# Display model summary
model.summary()

# Visualize model architecture
keras.utils.plot_model(model, show_shapes=True, show_layer_names=True)

In [None]:
# ============================================================================
# CELL 6: COMPILE AND TRAIN CNN
# ============================================================================
print("üéØ Compiling and Training CNN Model...")

# Compile the model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks for better training
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=10,
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=0.00001,
        verbose=1
    )
]

print("‚ö° Starting training...")
print(f"   Epochs: 30")
print(f"   Batch size: 32")
print(f"   Training samples: {X_train.shape[0]}")
print(f"   Validation samples: {X_val.shape[0]}")

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    verbose=1
)

print("‚úÖ Training completed!")

In [None]:
# ============================================================================
# CELL 7: EVALUATE MODEL
# ============================================================================
print("üìä Evaluating Model Performance...")

# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Accuracy plot
axes[0].plot(history.history['accuracy'], label='Training Accuracy')
axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].set_title('Training and Validation Accuracy')
axes[0].legend()
axes[0].grid(True)

# Loss plot
axes[1].plot(history.history['loss'], label='Training Loss')
axes[1].plot(history.history['val_loss'], label='Validation Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].set_title('Training and Validation Loss')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nüéØ Test Set Performance:")
print(f"   Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"   Test Loss: {test_loss:.4f}")

# Get predictions
y_pred_probs = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred_probs, axis=1)

# Classification report
print("\nüìã Classification Report:")
print(classification_report(y_test, y_pred, target_names=gesture_names))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=gesture_names, yticklabels=gesture_names)
plt.title(f'Confusion Matrix\nTest Accuracy: {test_accuracy:.4f}')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

# Per-class accuracy
print("\nüìà Per-Class Accuracy:")
class_accuracies = cm.diagonal() / cm.sum(axis=1)
for i, gesture in enumerate(gesture_names):
    print(f"   {gesture}: {class_accuracies[i]:.3f}")

In [None]:
# ============================================================================
# CELL 8: TEST WITH RANDOM DATASET IMAGES
# ============================================================================
print("üß™ Testing Model with Random Dataset Images...")

def test_random_dataset_images(num_tests=6):
    """Test model with random images from the actual dataset"""

    results = []

    for i in range(num_tests):
        # Random subject and gesture
        subject = random.choice(subjects[8:])  # Use subjects not in training
        gesture = random.choice(gesture_names)

        # Get random image
        gesture_path = os.path.join(dataset_path, subject, gesture)
        image_files = [f for f in os.listdir(gesture_path) if f.endswith('.png')]

        if not image_files:
            continue

        image_file = random.choice(image_files)
        image_path = os.path.join(gesture_path, image_file)

        # Load and process image
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        img_resized = cv2.resize(img, (64, 64)) / 255.0
        img_input = np.expand_dims(img_resized, axis=(0, -1))

        # Predict
        predictions = model.predict(img_input, verbose=0)[0]
        predicted_idx = np.argmax(predictions)
        predicted_gesture = gesture_names[predicted_idx]
        confidence = predictions[predicted_idx]

        # Get top 3 predictions
        top_3_indices = np.argsort(predictions)[-3:][::-1]
        top_3_predictions = [(gesture_names[idx], predictions[idx]) for idx in top_3_indices]

        results.append({
            'true_gesture': gesture,
            'predicted_gesture': predicted_gesture,
            'confidence': confidence,
            'top_3': top_3_predictions,
            'image': img,
            'correct': gesture == predicted_gesture
        })

    # Display results
    num_results = len(results)
    cols = min(3, num_results)
    rows = (num_results + cols - 1) // cols

    fig, axes = plt.subplots(rows, cols, figsize=(15, 5*rows))

    for i, result in enumerate(results):
        row = i // cols
        col = i % cols

        if rows > 1:
            ax = axes[row, col]
        else:
            ax = axes[col]

        # Display image
        ax.imshow(result['image'], cmap='gray')

        # Format title
        color = 'green' if result['correct'] else 'red'
        title = f"True: {result['true_gesture']}\n"
        title += f"Pred: {result['predicted_gesture']}\n"
        title += f"Conf: {result['confidence']:.1%}\n"
        title += f"{'‚úÖ' if result['correct'] else '‚ùå'}"

        ax.set_title(title, fontsize=10, color=color)
        ax.axis('off')

    # Hide empty subplots
    for i in range(num_results, rows*cols):
        row = i // cols
        col = i % cols
        if rows > 1:
            axes[row, col].axis('off')
        else:
            axes[col].axis('off')

    plt.suptitle(f"CNN Model Test Results (Accuracy on these: {sum(r['correct'] for r in results)}/{num_results})",
                 fontsize=14)
    plt.tight_layout()
    plt.show()

    # Print detailed results
    print("\nüìã Detailed Predictions:")
    for i, result in enumerate(results):
        print(f"\nTest {i+1}:")
        print(f"  True: {result['true_gesture']}")
        print(f"  Predicted: {result['predicted_gesture']} ({result['confidence']:.1%})")
        print(f"  Result: {'‚úÖ CORRECT' if result['correct'] else '‚ùå WRONG'}")
        print(f"  Top 3 predictions:")
        for gesture, prob in result['top_3']:
            print(f"    - {gesture}: {prob:.1%}")

# Run tests
test_random_dataset_images(6)

In [None]:
# ============================================================================
# CELL 9: UPLOAD AND PREDICT WITH CNN
# ============================================================================
print("üì§ READY FOR IMAGE UPLOAD AND PREDICTION WITH CNN!")

def predict_with_cnn():
    """Upload any hand image and predict with CNN"""

    print("\nüì§ Click 'Choose Files' to upload a hand gesture image...")
    uploaded = files.upload()

    if not uploaded:
        print("No file selected")
        return

    # Process uploaded image
    filename = list(uploaded.keys())[0]
    file_bytes = uploaded[filename]

    # Convert to image
    nparr = np.frombuffer(file_bytes, np.uint8)
    img_color = cv2.imdecode(nparr, cv2.IMREAD_COLOR)

    if img_color is None:
        print("‚ùå Could not read image")
        return

    # Convert to grayscale
    img_gray = cv2.cvtColor(img_color, cv2.COLOR_BGR2GRAY)

    # Enhanced preprocessing for better prediction
    # 1. Apply adaptive thresholding
    img_thresh = cv2.adaptiveThreshold(img_gray, 255,
                                       cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                       cv2.THRESH_BINARY_INV, 11, 2)

    # 2. Find contours to locate hand
    contours, _ = cv2.findContours(img_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if contours:
        # Get the largest contour (hand)
        largest_contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(largest_contour)

        # Add padding
        padding = 20
        x = max(0, x - padding)
        y = max(0, y - padding)
        w = min(img_gray.shape[1] - x, w + 2*padding)
        h = min(img_gray.shape[0] - y, h + 2*padding)

        # Crop hand region
        hand_roi = img_gray[y:y+h, x:x+w]
    else:
        # If no contours found, use center crop
        h, w = img_gray.shape
        size = min(h, w) // 2
        center_x, center_y = w // 2, h // 2
        hand_roi = img_gray[center_y-size//2:center_y+size//2,
                           center_x-size//2:center_x+size//2]

    # Resize to model input size
    img_resized = cv2.resize(hand_roi, (64, 64))

    # Normalize
    img_normalized = img_resized / 255.0

    # Prepare for model (add batch and channel dimensions)
    img_input = np.expand_dims(img_normalized, axis=(0, -1))

    # Predict with CNN
    predictions = model.predict(img_input, verbose=0)[0]
    predicted_idx = np.argmax(predictions)
    predicted_gesture = gesture_names[predicted_idx]
    confidence = predictions[predicted_idx]

    # Get top 5 predictions
    top_5_indices = np.argsort(predictions)[-5:][::-1]
    top_5_predictions = [(gesture_names[idx], predictions[idx]) for idx in top_5_indices]

    # Display results
    fig, axes = plt.subplots(2, 3, figsize=(12, 8))

    # Original color
    axes[0, 0].imshow(cv2.cvtColor(img_color, cv2.COLOR_BGR2RGB))
    axes[0, 0].set_title('Original Image')
    axes[0, 0].axis('off')

    # Grayscale
    axes[0, 1].imshow(img_gray, cmap='gray')
    axes[0, 1].set_title('Grayscale')
    axes[0, 1].axis('off')

    # Threshold
    axes[0, 2].imshow(img_thresh, cmap='gray')
    axes[0, 2].set_title('Threshold')
    axes[0, 2].axis('off')

    # Hand ROI
    axes[1, 0].imshow(hand_roi, cmap='gray')
    axes[1, 0].set_title('Hand Region')
    axes[1, 0].axis('off')

    # Processed for model
    axes[1, 1].imshow(img_resized, cmap='gray')
    axes[1, 1].set_title('Processed 64x64')
    axes[1, 1].axis('off')

    # Prediction chart
    axes[1, 2].barh(range(len(top_5_predictions)),
                    [p[1] for p in top_5_predictions])
    axes[1, 2].set_yticks(range(len(top_5_predictions)))
    axes[1, 2].set_yticklabels([p[0] for p in top_5_predictions])
    axes[1, 2].set_xlabel('Confidence')
    axes[1, 2].set_title('Top 5 Predictions')
    axes[1, 2].set_xlim([0, 1])

    # Highlight the top prediction
    for i, (gesture, prob) in enumerate(top_5_predictions):
        if i == 0:
            axes[1, 2].get_yticklabels()[i].set_color('red')
            axes[1, 2].get_yticklabels()[i].set_fontweight('bold')

    plt.suptitle(f"CNN PREDICTION: {predicted_gesture}\nConfidence: {confidence:.1%}",
                 fontsize=16, color='green' if confidence > 0.8 else 'orange')
    plt.tight_layout()
    plt.show()

    # Print details
    print(f"\nüìä PREDICTION RESULTS:")
    print(f"   File: {filename}")
    print(f"   Top Prediction: {predicted_gesture} ({confidence:.1%})")
    print(f"\nüîÆ Top 5 Predictions:")
    for i, (gesture, prob) in enumerate(top_5_predictions):
        prefix = "üéØ " if i == 0 else "   "
        print(f"{prefix}{i+1}. {gesture}: {prob:.1%}")

    # Gesture descriptions
    gesture_descriptions = {
        "01_palm": "Open palm facing forward",
        "02_l": "Index finger and thumb making L shape",
        "03_fist": "Closed fist",
        "04_fist_moved": "Fist moving sideways",
        "05_thumb": "Thumbs up gesture",
        "06_index": "Pointing index finger",
        "07_ok": "OK sign (thumb and index touching)",
        "08_palm_moved": "Palm moving/rotating",
        "09_c": "Hand making C shape",
        "10_down": "Hand pointing downward"
    }

    if predicted_gesture in gesture_descriptions:
        print(f"\nüìù Description: {gesture_descriptions[predicted_gesture]}")

    print(f"\nü§ñ Model Info: CNN trained on {len(gesture_names)} gestures")
    print(f"   Test Accuracy: {test_accuracy:.1%}")

# Run the prediction
predict_with_cnn()