# Real-Time ASL Detection Using Webcam

This notebook demonstrates how to use a trained CNN model to detect static ASL signs in real-time using a webcam. OpenCV is used for video capture and preprocessing.

## Import Required Libraries

This code cell imports all the necessary libraries for webcam-based ASL detection, including OpenCV for video processing and TensorFlow for model inference.

In [None]:
import cv2
import numpy as np
import tensorflow as tf
import string

## Load the Trained Model

This function loads the trained CNN model from a specified path and prepares it for inference.

In [None]:
def load_trained_model(model_path):
    """
    Load the trained CNN model from a specified path.
    
    Args:
        model_path (str): Path to the saved model file
        
    Returns:
        tensorflow.keras.Model: Loaded model ready for inference
    """
    try:
        model = tf.keras.models.load_model(model_path)
        print(f"Model loaded successfully from: {model_path}")
        print(f"Model input shape: {model.input_shape}")
        print(f"Model output shape: {model.output_shape}")
        return model
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

## Create Label Mapping

This function creates a mapping from numerical predictions to actual letters (A-Z) for displaying results.

In [None]:
def create_label_mapping():
    """
    Create a mapping from numerical predictions to letters.
    
    Returns:
        dict: Dictionary mapping indices to letters (0->A, 1->B, etc.)
    """
    # Create mapping for A-Z (26 classes)
    label_map = {i: letter for i, letter in enumerate(string.ascii_uppercase)}
    return label_map

# Create the label mapping
LABEL_MAP = create_label_mapping()
print("Label mapping created:")
print(LABEL_MAP)

## Preprocess Image for Inference

This function preprocesses a single image frame for model inference by resizing and normalizing it.

In [None]:
def preprocess_frame(frame, target_size=(224, 224)):
    """
    Preprocess a video frame for model inference.
    
    Args:
        frame (np.array): Input frame from webcam
        target_size (tuple): Target size for resizing
        
    Returns:
        np.array: Preprocessed frame ready for inference
    """
    # Resize the frame
    resized_frame = cv2.resize(frame, target_size)
    
    # Normalize pixel values to [0, 1]
    normalized_frame = resized_frame / 255.0
    
    # Add batch dimension
    input_frame = np.expand_dims(normalized_frame, axis=0)
    
    return input_frame

## Test Model on Sample Image

This function tests the trained model on a sample image to verify its functionality before starting real-time detection.

In [None]:
def test_model_on_sample_image(model, sample_image_path, target_size=(224, 224)):
    """
    Test the trained model on a sample image.
    
    Args:
        model: The trained CNN model
        sample_image_path (str): Path to the sample image
        target_size (tuple): Target size for resizing
    """
    try:
        # Load and preprocess the sample image
        sample_image = cv2.imread(sample_image_path)
        if sample_image is None:
            print(f"Error: Could not load image from {sample_image_path}")
            return
        
        input_image = preprocess_frame(sample_image, target_size)
        
        # Run inference
        predictions = model.predict(input_image)
        predicted_index = np.argmax(predictions)
        predicted_letter = LABEL_MAP[predicted_index]
        confidence = predictions[0][predicted_index]
        
        print(f"Predicted letter: {predicted_letter}")
        print(f"Confidence: {confidence:.4f}")
        print(f"Top 3 predictions:")
        
        # Show top 3 predictions
        top_indices = np.argsort(predictions[0])[-3:][::-1]
        for i, idx in enumerate(top_indices):
            letter = LABEL_MAP[idx]
            conf = predictions[0][idx]
            print(f"  {i+1}. {letter}: {conf:.4f}")
            
    except Exception as e:
        print(f"Error during inference: {e}")

## Set Up Real-Time Webcam Detection

This function starts the webcam and performs real-time ASL sign detection, displaying the predicted letter on the video feed.

In [None]:
def start_webcam_detection(model, target_size=(224, 224), confidence_threshold=0.5):
    """
    Start real-time ASL detection using webcam.
    
    Args:
        model: The trained CNN model
        target_size (tuple): Target size for resizing frames
        confidence_threshold (float): Minimum confidence to display prediction
    """
    cap = cv2.VideoCapture(0)
    
    if not cap.isOpened():
        print("Error: Could not open webcam")
        return
    
    print("Starting webcam detection...")
    print("Press 'q' to quit")
    print("Press 's' to save current frame")
    
    frame_count = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to capture frame")
            break
        
        # Flip frame horizontally for mirror effect
        frame = cv2.flip(frame, 1)
        
        # Preprocess the frame
        input_frame = preprocess_frame(frame, target_size)
        
        # Run inference
        predictions = model.predict(input_frame, verbose=0)
        predicted_index = np.argmax(predictions)
        predicted_letter = LABEL_MAP[predicted_index]
        confidence = predictions[0][predicted_index]
        
        # Display prediction only if confidence is above threshold
        if confidence > confidence_threshold:
            text = f"Letter: {predicted_letter} ({confidence:.2f})"
            color = (0, 255, 0)  # Green
        else:
            text = "No confident prediction"
            color = (0, 255, 255)  # Yellow
        
        # Add text overlay
        cv2.putText(frame, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
        cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 20), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
        
        # Display the frame
        cv2.imshow('ASL Real-Time Detection', frame)
        
        # Handle key presses
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
        elif key == ord('s'):
            # Save current frame
            filename = f"captured_frame_{frame_count}.jpg"
            cv2.imwrite(filename, frame)
            print(f"Frame saved as {filename}")
            frame_count += 1
    
    # Cleanup
    cap.release()
    cv2.destroyAllWindows()
    print("Webcam detection stopped.")

## Complete Inference Pipeline Example

This cell demonstrates how to use all the functions together to perform real-time ASL detection.

In [None]:
# Example: Complete Real-Time Inference Pipeline

# Configuration
MODEL_PATH = "../models/asl_cnn_model.h5"  # Path to your trained model
CONFIDENCE_THRESHOLD = 0.7  # Minimum confidence for displaying predictions

# Step 1: Load the trained model
print("Loading trained model...")
model = load_trained_model(MODEL_PATH)

if model is not None:
    # Step 2: Start real-time webcam detection
    print("\nStarting real-time webcam detection...")
    start_webcam_detection(model, confidence_threshold=CONFIDENCE_THRESHOLD)
else:
    print("Failed to load model. Please check the model path.")