# Production-Ready Hand Gesture Presentation Controller

Features:
- Real-time webcam capture with OpenCV
- Hand gesture detection using trained ML model
- Automatic presentation control via keyboard simulation
- Confidence-based filtering and cooldown mechanism
- Graceful error handling and resource cleanup

## Import Dependencies

In [38]:
import sys
import logging
from pathlib import Path
from typing import Tuple, Optional

import cv2
import numpy as np
import joblib
import pyautogui
from skimage.feature import hog

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

## GestureController Class Definition

In [39]:
class GestureController:
    def __init__(
        self,
        model_path: Path,
        confidence_threshold: float = 0.65,
        disappear_frames: int = 5,
        roi_bounds: Tuple[int, int, int, int] = (0, 0, 640, 480)
    ):
        self.model_path = Path(model_path)
        self.confidence_threshold = confidence_threshold
        self.disappear_frames = disappear_frames
        self.roi_bounds = roi_bounds
        
        self.model = None
        self.hog_params = None
        self.target_size = None
        self.class_names = None
        
        self.cap = None
        self.frame_count = 0
        self.last_triggered_label = None
        self.no_detection_count = 0
        self.can_trigger = True
        self.hand_bbox = None
        
        self._load_model()
        self._init_camera()
        
    def _load_model(self) -> None:
        try:
            if not self.model_path.exists():
                raise FileNotFoundError(f"Model file not found: {self.model_path}")
            
            if self.model_path.suffix == '.pkl':
                logger.info(f"Loading SVM model from {self.model_path}")
                bundle = joblib.load(self.model_path)
                self.model = bundle.get("model")
                self.hog_params = bundle.get("hog_params")
                self.target_size = tuple(bundle.get("target_size", (128, 128)))
                self.class_names = self.model.classes_.tolist()
                
            elif self.model_path.suffix == '.h5':
                logger.info(f"Loading Keras model from {self.model_path}")
                try:
                    from tensorflow import keras
                    self.model = keras.models.load_model(self.model_path)
                    
                    labels_path = self.model_path.parent / "labels.txt"
                    if labels_path.exists():
                        with open(labels_path, 'r') as f:
                            self.class_names = [line.strip() for line in f if line.strip()]
                    else:
                        self.class_names = ["next", "previous"]
                    
                    input_shape = self.model.input_shape
                    self.target_size = (input_shape[1], input_shape[2])
                    
                except ImportError:
                    raise ImportError("TensorFlow/Keras not installed. Install with: pip install tensorflow")
            else:
                raise ValueError(f"Unsupported model format: {self.model_path.suffix}")
            
            logger.info(f"Model loaded successfully. Classes: {self.class_names}")
            logger.info(f"Target input size: {self.target_size}")
            
        except Exception as e:
            logger.error(f"Failed to load model: {e}")
            raise
    
    def _init_camera(self) -> None:
        try:
            self.cap = cv2.VideoCapture(0)
            
            if not self.cap.isOpened():
                raise RuntimeError("Failed to open webcam")
            
            self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
            self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
            
            logger.info("Webcam initialized successfully")
            
        except Exception as e:
            logger.error(f"Failed to initialize camera: {e}")
            raise
    
    def _preprocess_roi(self, frame: np.ndarray) -> np.ndarray:
        x0, y0, x1, y1 = self.roi_bounds
        
        h, w = frame.shape[:2]
        x0, x1 = max(0, x0), min(w, x1)
        y0, y1 = max(0, y0), min(h, y1)
        
        roi_frame = frame[y0:y1, x0:x1]
        
        if roi_frame.size == 0:
            raise ValueError("Invalid ROI bounds - extracted region is empty")
        
        if self.model_path.suffix == '.pkl' and self.hog_params:
            gray = cv2.cvtColor(roi_frame, cv2.COLOR_BGR2GRAY)
            resized = cv2.resize(gray, self.target_size)
            normalized = resized.astype(np.float32) / 255.0
            descriptor = hog(normalized, **self.hog_params)
            return descriptor.reshape(1, -1)
        
        elif self.model_path.suffix == '.h5':
            resized = cv2.resize(roi_frame, self.target_size)
            normalized = resized.astype(np.float32) / 255.0
            
            if len(normalized.shape) == 2:
                normalized = np.expand_dims(normalized, axis=-1)
            
            return np.expand_dims(normalized, axis=0)
        
        else:
            raise NotImplementedError(f"Preprocessing not implemented for {self.model_path.suffix}")
    
    def _predict(self, features: np.ndarray) -> Tuple[str, float]:
        try:
            if self.model_path.suffix == '.pkl':
                probabilities = self.model.predict_proba(features)[0]
                
            elif self.model_path.suffix == '.h5':
                predictions = self.model.predict(features, verbose=0)[0]
                
                probabilities = predictions
                if predictions.max() > 1.0 or predictions.min() < 0.0:
                    exp_preds = np.exp(predictions - predictions.max())
                    probabilities = exp_preds / exp_preds.sum()
            
            top_idx = int(np.argmax(probabilities))
            predicted_label = self.class_names[top_idx]
            confidence = float(probabilities[top_idx])
            
            return predicted_label, confidence
            
        except Exception as e:
            logger.error(f"Prediction failed: {e}")
            return "unknown", 0.0
    
    def _trigger_action(self, label: str) -> None:
        label_lower = label.lower()
        
        if label_lower == "next":
            pyautogui.press('right')
            logger.info(f"Action triggered: Next slide (right arrow)")
            
        elif label_lower in ["back", "previous", "prev"]:
            pyautogui.press('left')
            logger.info(f"Action triggered: Previous slide (left arrow)")
            
        else:
            logger.warning(f"Unknown gesture label: {label}")
            return
        
        self.last_triggered_label = label
        self.can_trigger = False
        self.no_detection_count = 0
    
    def _update_trigger_state(self, detected: bool) -> None:
        if not self.can_trigger:
            if not detected:
                self.no_detection_count += 1
                
                if self.no_detection_count >= self.disappear_frames:
                    self.can_trigger = True
                    self.no_detection_count = 0
                    logger.info("Ready for next gesture trigger")
            else:
                self.no_detection_count = 0
    
    def _detect_hand_location(self, frame: np.ndarray) -> Optional[Tuple[int, int, int, int]]:
        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        
        lower_skin = np.array([0, 20, 70], dtype=np.uint8)
        upper_skin = np.array([20, 255, 255], dtype=np.uint8)
        mask = cv2.inRange(hsv, lower_skin, upper_skin)
        
        kernel = np.ones((5, 5), np.uint8)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
        mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
        
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if not contours:
            return None
        
        largest_contour = max(contours, key=cv2.contourArea)
        
        if cv2.contourArea(largest_contour) < 5000:
            return None
        
        x, y, w, h = cv2.boundingRect(largest_contour)
        return (x, y, w, h)
    
    def _draw_ui(
        self,
        frame: np.ndarray,
        label: str,
        confidence: float,
        show_box: bool
    ) -> np.ndarray:
        if show_box and self.hand_bbox is not None:
            x, y, w, h = self.hand_bbox
            
            color = (0, 255, 0)
            
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 3)
            
            label_text = f"{label.capitalize()} {confidence*100:.1f}%"
            (text_width, text_height), baseline = cv2.getTextSize(
                label_text,
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                2
            )
            
            text_x = x
            text_y = y - 10
            
            if text_y < text_height:
                text_y = y + h + text_height + 10
            
            cv2.rectangle(
                frame,
                (text_x, text_y - text_height - 5),
                (text_x + text_width + 10, text_y + 5),
                (0, 0, 0),
                -1
            )
            
            cv2.putText(
                frame,
                label_text,
                (text_x + 5, text_y),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                color,
                2
            )
        
        status_text = "Ready" if self.can_trigger else "Cooldown"
        status_color = (0, 255, 0) if self.can_trigger else (0, 165, 255)
        
        cv2.putText(
            frame,
            f"Status: {status_text}",
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            status_color,
            2
        )
        
        return frame
    
    def run(self) -> None:
        logger.info("Starting gesture controller. Press 'q' to quit.")
        logger.info(f"Confidence threshold: {self.confidence_threshold}")
        logger.info(f"Cooldown frames: {self.disappear_frames}")
        
        try:
            while True:
                ret, frame = self.cap.read()
                
                if not ret:
                    logger.error("Failed to grab frame")
                    break
                
                self.frame_count += 1
                
                try:
                    self.hand_bbox = self._detect_hand_location(frame)
                    
                    features = self._preprocess_roi(frame)
                    label, confidence = self._predict(features)
                    
                    detected = confidence >= self.confidence_threshold
                    
                    if detected and self.can_trigger:
                        self._trigger_action(label)
                    
                    self._update_trigger_state(detected)
                    
                    frame = self._draw_ui(frame, label, confidence, detected)
                    
                except Exception as e:
                    logger.error(f"Frame processing error: {e}")
                
                cv2.imshow('Hand Gesture Controller', frame)
                
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    logger.info("Quit signal received")
                    break
                    
        except KeyboardInterrupt:
            logger.info("Interrupted by user")
            
        finally:
            self.cleanup()
    
    def cleanup(self) -> None:
        logger.info("Cleaning up resources...")
        
        if self.cap is not None:
            self.cap.release()
        
        cv2.destroyAllWindows()
        
        logger.info(f"Total frames processed: {self.frame_count}")
        logger.info("Shutdown complete")

## Configuration and Setup

In [40]:
base_dir = Path.cwd()
artifacts_dir = base_dir / "artifacts"

model_path = artifacts_dir / "gesture_svm.pkl"

if not model_path.exists():
    model_path = artifacts_dir / "model.h5"

if not model_path.exists():
    print(f"ERROR: No model found in {artifacts_dir}")
    print("Please ensure either gesture_svm.pkl or model.h5 exists")
else:
    print(f"Model found: {model_path}")
    print(f"Model type: {model_path.suffix}")

Model found: c:\Users\Arya\Downloads\HandGesturePresentationController-main\HandGesturePresentationController-main\artifacts\gesture_svm.pkl
Model type: .pkl


## Run Gesture Controller

**Instructions:**
- The webcam window will open
- Show hand gesture anywhere in the camera frame
- Show "Next" gesture → presses right arrow key
- Show "Back"/"Previous" gesture → presses left arrow key
- Detection box will appear around detected gestures with confidence
- After a gesture is triggered, remove your hand for 5 frames before the next trigger
- Press 'q' to quit

**Parameters:**
- `confidence_threshold`: 0.65 (65% confidence required)
- `disappear_frames`: 5 (hand must disappear for 5 frames)
- `roi_bounds`: (0, 0, 640, 480) - full camera frame

In [41]:
try:
    controller = GestureController(
        model_path=model_path,
        confidence_threshold=0.65,
        disappear_frames=5,
        roi_bounds=(0, 0, 640, 480)
    )
    
    controller.run()
    
except Exception as e:
    logger.error(f"Fatal error: {e}")
    import traceback
    traceback.print_exc()

2025-12-08 13:08:06,604 - __main__ - INFO - Loading SVM model from c:\Users\Arya\Downloads\HandGesturePresentationController-main\HandGesturePresentationController-main\artifacts\gesture_svm.pkl
2025-12-08 13:08:06,611 - __main__ - INFO - Model loaded successfully. Classes: ['next', 'previous']
2025-12-08 13:08:06,613 - __main__ - INFO - Target input size: (128, 128)
2025-12-08 13:08:18,230 - __main__ - INFO - Webcam initialized successfully
2025-12-08 13:08:18,232 - __main__ - INFO - Starting gesture controller. Press 'q' to quit.
2025-12-08 13:08:18,233 - __main__ - INFO - Confidence threshold: 0.65
2025-12-08 13:08:18,234 - __main__ - INFO - Cooldown frames: 5
2025-12-08 13:08:18,641 - __main__ - INFO - Action triggered: Previous slide (left arrow)
2025-12-08 13:08:18,933 - __main__ - INFO - Ready for next gesture trigger
2025-12-08 13:11:30,873 - __main__ - INFO - Action triggered: Previous slide (left arrow)
2025-12-08 13:11:31,699 - __main__ - INFO - Ready for next gesture trigge