 ============================================================================
 REAL-TIME HAND SIGN RECOGNITION WITH WEBCAM
 Jupyter Notebook - Live Prediction from Camera
 ============================================================================

 This notebook loads your trained model and performs real-time hand sign
 recognition using your laptop's webcam.

 Requirements:
  - Trained model file (best_model.pth or best_model.h5)
  - Webcam/camera access
  - Same preprocessing as training

 ============================================================================

In [1]:
# ============================================================================
# SECTION 1: IMPORTS AND SETUP
# ============================================================================

import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import time
import warnings
warnings.filterwarnings('ignore')

# Choose your framework: 'pytorch' or 'keras' (MUST match training)
FRAMEWORK = 'pytorch'  # Change to 'keras' if you trained with Keras

if FRAMEWORK == 'pytorch':
    import torch
    import torch.nn as nn
    import torchvision.transforms as transforms
    print(f"‚úÖ Using PyTorch {torch.__version__}")
    print(f"‚úÖ CUDA Available: {torch.cuda.is_available()}")
else:
    import tensorflow as tf
    from tensorflow import keras
    print(f"‚úÖ Using TensorFlow {tf.__version__}")

print("="*80)
print("üìπ REAL-TIME HAND SIGN RECOGNITION")
print("="*80)

‚úÖ Using PyTorch 2.7.1+cu118
‚úÖ CUDA Available: True
üìπ REAL-TIME HAND SIGN RECOGNITION


In [2]:
# ============================================================================
# SECTION 2: CONFIGURATION
# ============================================================================

class Config:
    # Model parameters (MUST match training configuration)
    IMG_SIZE = 224
    NUM_CLASSES = 28  # Change to match your number of classes
    
    # Model file path
    MODEL_PATH = 'best_model.pth'  # or 'best_model.h5' for Keras
    
    # Camera settings
    CAMERA_ID = 0  # Usually 0 for built-in webcam, try 1 if not working
    CAMERA_WIDTH = 640
    CAMERA_HEIGHT = 480
    
    # Display settings
    CONFIDENCE_THRESHOLD = 0.5  # Minimum confidence to show prediction
    
    # Device
    DEVICE = 'cuda' if FRAMEWORK == 'pytorch' and torch.cuda.is_available() else 'cpu'
    
    # Class names - Using numeric labels (0-27)
    CLASS_NAMES = [f'Class {i}' for i in range(NUM_CLASSES)]
    

config = Config()

print(f"\n‚öôÔ∏è  Configuration:")
print(f"   Model Path: {config.MODEL_PATH}")
print(f"   Image Size: {config.IMG_SIZE}x{config.IMG_SIZE}")
print(f"   Number of Classes: {config.NUM_CLASSES}")
print(f"   Device: {config.DEVICE}")
print(f"   Camera ID: {config.CAMERA_ID}")
print(f"   Confidence Threshold: {config.CONFIDENCE_THRESHOLD}")


‚öôÔ∏è  Configuration:
   Model Path: best_model.pth
   Image Size: 224x224
   Number of Classes: 28
   Device: cuda
   Camera ID: 0
   Confidence Threshold: 0.5


In [3]:
# ============================================================================
# SECTION 3: MODEL ARCHITECTURE (must match training)
# ============================================================================

if FRAMEWORK == 'pytorch':
    
    class HandSignCNN(nn.Module):
        """Custom CNN Architecture - MUST MATCH TRAINING"""
        
        def __init__(self, num_classes=10):
            super(HandSignCNN, self).__init__()
            
            # Convolutional Block 1
            self.conv1 = nn.Sequential(
                nn.Conv2d(3, 32, kernel_size=3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(inplace=True),
                nn.Conv2d(32, 32, kernel_size=3, padding=1),
                nn.BatchNorm2d(32),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Dropout2d(0.25)
            )
            
            # Convolutional Block 2
            self.conv2 = nn.Sequential(
                nn.Conv2d(32, 64, kernel_size=3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(inplace=True),
                nn.Conv2d(64, 64, kernel_size=3, padding=1),
                nn.BatchNorm2d(64),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Dropout2d(0.25)
            )
            
            # Convolutional Block 3
            self.conv3 = nn.Sequential(
                nn.Conv2d(64, 128, kernel_size=3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(inplace=True),
                nn.Conv2d(128, 128, kernel_size=3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Dropout2d(0.25)
            )
            
            # Convolutional Block 4
            self.conv4 = nn.Sequential(
                nn.Conv2d(128, 256, kernel_size=3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(inplace=True),
                nn.Conv2d(256, 256, kernel_size=3, padding=1),
                nn.BatchNorm2d(256),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2),
                nn.Dropout2d(0.25)
            )
            
            # Global Average Pooling
            self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
            
            # Fully Connected Layers
            self.fc = nn.Sequential(
                nn.Flatten(),
                nn.Linear(256, 512),
                nn.BatchNorm1d(512),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Linear(512, 256),
                nn.BatchNorm1d(256),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Linear(256, num_classes)
            )
        
        def forward(self, x):
            x = self.conv1(x)
            x = self.conv2(x)
            x = self.conv3(x)
            x = self.conv4(x)
            x = self.global_avg_pool(x)
            x = self.fc(x)
            return x

In [4]:
# ============================================================================
# SECTION 4: LOAD TRAINED MODEL
# ============================================================================

print("\nüîÑ Loading trained model...")

if FRAMEWORK == 'pytorch':
    # Create model
    model = HandSignCNN(num_classes=config.NUM_CLASSES).to(config.DEVICE)
    
    # Load weights
    if not os.path.exists(config.MODEL_PATH):
        raise FileNotFoundError(f"Model file not found: {config.MODEL_PATH}")
    
    model.load_state_dict(torch.load(config.MODEL_PATH, map_location=config.DEVICE))
    model.eval()
    
    # Define preprocessing transform (MUST match training)
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((config.IMG_SIZE, config.IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                           std=[0.229, 0.224, 0.225])
    ])
    
    print("   ‚úÖ PyTorch model loaded successfully!")
    
else:  # Keras
    if not os.path.exists(config.MODEL_PATH):
        raise FileNotFoundError(f"Model file not found: {config.MODEL_PATH}")
    
    model = keras.models.load_model(config.MODEL_PATH)
    print("   ‚úÖ Keras model loaded successfully!")

print(f"   üìä Model ready for inference on {config.DEVICE}")



üîÑ Loading trained model...
   ‚úÖ PyTorch model loaded successfully!
   üìä Model ready for inference on cuda


In [5]:
# ============================================================================
# SECTION 5: PREDICTION FUNCTION
# ============================================================================

def predict_frame(frame, model, transform=None):
    """
    Predict hand sign from a camera frame
    
    Args:
        frame: OpenCV frame (BGR format)
        model: Trained model
        transform: Preprocessing transform (for PyTorch)
    
    Returns:
        predicted_class: Predicted class index
        confidence: Prediction confidence
        all_probs: All class probabilities
    """
    
    # Convert BGR to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    if FRAMEWORK == 'pytorch':
        # Preprocess
        input_tensor = transform(rgb_frame).unsqueeze(0).to(config.DEVICE)
        
        # Predict
        with torch.no_grad():
            output = model(input_tensor)
            probs = torch.softmax(output, dim=1)
            confidence, predicted = torch.max(probs, 1)
            
        predicted_class = predicted.item()
        confidence = confidence.item()
        all_probs = probs.cpu().numpy()[0]
        
    else:  # Keras
        # Preprocess
        img = cv2.resize(rgb_frame, (config.IMG_SIZE, config.IMG_SIZE))
        img = img / 255.0
        img = np.expand_dims(img, axis=0)
        
        # Predict
        probs = model.predict(img, verbose=0)[0]
        predicted_class = np.argmax(probs)
        confidence = probs[predicted_class]
        all_probs = probs
    
    return predicted_class, confidence, all_probs


def draw_prediction_on_frame(frame, predicted_class, confidence, all_probs, top_k=3):
    """Draw prediction results on the frame"""
    
    h, w = frame.shape[:2]
    
    # Create overlay for better readability
    overlay = frame.copy()
    
    # Draw semi-transparent background for text
    cv2.rectangle(overlay, (10, 10), (w - 10, 200), (0, 0, 0), -1)
    frame = cv2.addWeighted(overlay, 0.6, frame, 0.4, 0)
    
    # Main prediction
    class_name = config.CLASS_NAMES[predicted_class]
    
    # Color based on confidence
    if confidence >= 0.8:
        color = (0, 255, 0)  # Green
    elif confidence >= 0.5:
        color = (0, 255, 255)  # Yellow
    else:
        color = (0, 165, 255)  # Orange
    
    # Draw main prediction
    text = f"Prediction: {class_name}"
    cv2.putText(frame, text, (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 
                1.2, color, 3, cv2.LINE_AA)
    
    conf_text = f"Confidence: {confidence:.2%}"
    cv2.putText(frame, conf_text, (20, 90), cv2.FONT_HERSHEY_SIMPLEX, 
                0.8, color, 2, cv2.LINE_AA)
    
    # Draw top-k predictions
    top_k_indices = np.argsort(all_probs)[-top_k:][::-1]
    
    y_offset = 130
    cv2.putText(frame, f"Top {top_k}:", (20, y_offset), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
    
    for i, idx in enumerate(top_k_indices):
        y_offset += 30
        text = f"{i+1}. {config.CLASS_NAMES[idx]}: {all_probs[idx]:.2%}"
        cv2.putText(frame, text, (30, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 
                    0.5, (255, 255, 255), 1, cv2.LINE_AA)
    
    # Draw instructions
    instructions = "Press 'q' to quit | 's' to save snapshot"
    cv2.putText(frame, instructions, (20, h - 20), cv2.FONT_HERSHEY_SIMPLEX, 
                0.5, (200, 200, 200), 1, cv2.LINE_AA)
    
    # Draw FPS
    return frame

In [6]:
# ============================================================================
# SECTION 6: MAIN CAMERA LOOP
# ============================================================================

def run_camera_inference():
    """Main function to run real-time inference from camera"""
    
    print("\nüìπ Starting camera...")
    print("="*80)
    print("CONTROLS:")
    print("  Press 'q' to quit")
    print("  Press 's' to save snapshot")
    print("  Press 'p' to pause/resume")
    print("="*80)
    
    # Open camera
    cap = cv2.VideoCapture(config.CAMERA_ID)
    
    if not cap.isOpened():
        print("‚ùå Error: Could not open camera!")
        print("   Try changing CAMERA_ID in config (0, 1, 2, etc.)")
        return
    
    # Set camera properties
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, config.CAMERA_WIDTH)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, config.CAMERA_HEIGHT)
    
    print("‚úÖ Camera opened successfully!")
    print("\nüé¨ Press any key in the video window to start...")
    
    # For FPS calculation
    fps_time = time.time()
    fps_counter = 0
    fps_display = 0
    
    paused = False
    snapshot_counter = 0
    
    try:
        while True:
            if not paused:
                ret, frame = cap.read()
                
                if not ret:
                    print("‚ùå Error: Failed to grab frame")
                    break
                
                # Make prediction
                predicted_class, confidence, all_probs = predict_frame(
                    frame, model, transform if FRAMEWORK == 'pytorch' else None
                )
                
                # Draw results on frame
                frame = draw_prediction_on_frame(frame, predicted_class, 
                                                confidence, all_probs)
                
                # Calculate and draw FPS
                fps_counter += 1
                if time.time() - fps_time > 1:
                    fps_display = fps_counter
                    fps_counter = 0
                    fps_time = time.time()
                
                cv2.putText(frame, f"FPS: {fps_display}", (frame.shape[1] - 120, 30),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2, cv2.LINE_AA)
            
            # Display frame
            cv2.imshow('Hand Sign Recognition', frame)
            
            # Handle key presses
            key = cv2.waitKey(1) & 0xFF
            
            if key == ord('q'):
                print("\nüëã Quitting...")
                break
            
            elif key == ord('s'):
                # Save snapshot
                snapshot_counter += 1
                filename = f"snapshot_{snapshot_counter}.jpg"
                cv2.imwrite(filename, frame)
                print(f"üì∏ Snapshot saved: {filename}")
            
            elif key == ord('p'):
                # Pause/resume
                paused = not paused
                status = "PAUSED" if paused else "RESUMED"
                print(f"‚è∏Ô∏è  {status}")
    
    except KeyboardInterrupt:
        print("\n‚ö†Ô∏è  Interrupted by user")
    
    finally:
        # Clean up
        cap.release()
        cv2.destroyAllWindows()
        print("\n‚úÖ Camera closed")
        print("="*80)


In [7]:
# ============================================================================
# SECTION 7: RUN THE APPLICATION
# ============================================================================

print("\nüöÄ Ready to start!")
print("\nRun the cell below to start the camera inference:")
print("   run_camera_inference()")

# %%
# Start the camera application
run_camera_inference()


üöÄ Ready to start!

Run the cell below to start the camera inference:
   run_camera_inference()

üìπ Starting camera...
CONTROLS:
  Press 'q' to quit
  Press 's' to save snapshot
  Press 'p' to pause/resume
‚úÖ Camera opened successfully!

üé¨ Press any key in the video window to start...

üëã Quitting...

‚úÖ Camera closed


In [8]:
# ============================================================================
# OPTIONAL: SINGLE IMAGE TEST
# ============================================================================

def test_single_image(image_path):
    """Test the model on a single image file"""
    
    if not os.path.exists(image_path):
        print(f"‚ùå Image not found: {image_path}")
        return
    
    # Load image
    img = cv2.imread(image_path)
    if img is None:
        print(f"‚ùå Failed to load image: {image_path}")
        return
    
    # Make prediction
    predicted_class, confidence, all_probs = predict_frame(
        img, model, transform if FRAMEWORK == 'pytorch' else None
    )
    
    # Draw results
    result_img = draw_prediction_on_frame(img.copy(), predicted_class, 
                                          confidence, all_probs)
    
    # Display
    plt.figure(figsize=(12, 8))
    plt.imshow(cv2.cvtColor(result_img, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.title(f'Prediction: {config.CLASS_NAMES[predicted_class]} ({confidence:.2%})', 
              fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()
    
    print(f"\nüìä Prediction Results:")
    print(f"   Class: {config.CLASS_NAMES[predicted_class]}")
    print(f"   Confidence: {confidence:.2%}")
    print(f"\n   Top 5 Predictions:")
    top5 = np.argsort(all_probs)[-5:][::-1]
    for i, idx in enumerate(top5):
        print(f"   {i+1}. {config.CLASS_NAMES[idx]}: {all_probs[idx]:.2%}")

# Example usage:
# test_single_image('path/to/your/image.jpg')

# %%
print("\n" + "="*80)
print("‚úÖ SETUP COMPLETE!")
print("="*80)
print("\nüìå Quick Start:")
print("   1. Make sure your webcam is connected")
print("   2. Run: run_camera_inference()")
print("   3. Show hand signs to the camera")
print("   4. Press 'q' to quit, 's' to save snapshot")
print("\nüìå Test single image:")
print("   test_single_image('path/to/image.jpg')")
print("="*80)


‚úÖ SETUP COMPLETE!

üìå Quick Start:
   1. Make sure your webcam is connected
   2. Run: run_camera_inference()
   3. Show hand signs to the camera
   4. Press 'q' to quit, 's' to save snapshot

üìå Test single image:
   test_single_image('path/to/image.jpg')
