In [7]:
import cv2
import numpy as np
from ultralytics import YOLO
import time
import pyautogui

# Load the YOLO11n-pose model
model = YOLO("yolo11n-pose.pt")

# Initialize webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Keypoint index for right elbow (COCO format)
RIGHT_ELBOW = 8

# Variables for wave detection
prev_x = None
wave_start_time = None
wave_detected = False
cooldown = 2  # seconds between slide changes

# Main loop
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    results = model(frame, conf=0.5, classes=[0])  # Detect only person

    timestamp = time.time()

    for result in results:
        if hasattr(result, 'keypoints') and result.keypoints is not None:
            keypoints = result.keypoints.xy.cpu().numpy()
            confidences = result.keypoints.conf.cpu().numpy()

            for kpts, confs in zip(keypoints, confidences):
                if confs[RIGHT_ELBOW] > 0.5:
                    right_elbow_x, right_elbow_y = kpts[RIGHT_ELBOW]
                    cv2.circle(frame, (int(right_elbow_x), int(right_elbow_y)), 15, (0, 0, 255), -1)

                    # Wave detection logic
                    if prev_x is not None:
                        dx = right_elbow_x - prev_x

                        # Detect quick back-and-forth movement (wave)
                        if abs(dx) > 40:  # You can tune this threshold
                            if not wave_detected or (timestamp - wave_start_time > cooldown):
                                print("Wave detected! Advancing slide.")
                                pyautogui.press("right")  # Simulate right arrow key
                                wave_start_time = timestamp
                                wave_detected = True
                        else:
                            wave_detected = False

                    prev_x = right_elbow_x

    cv2.imshow("Wave to Advance Slide", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 480x640 1 person, 42.5ms
Speed: 1.3ms preprocess, 42.5ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 36.7ms
Speed: 2.0ms preprocess, 36.7ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 45.2ms
Speed: 1.1ms preprocess, 45.2ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 43.0ms
Speed: 1.1ms preprocess, 43.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 37.9ms
Speed: 1.1ms preprocess, 37.9ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 46.0ms
Speed: 1.1ms preprocess, 46.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 36.7ms
Speed: 1.1ms preprocess, 36.7ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 41.0ms
Speed: 1.1ms preprocess, 41.0ms inference, 0.8ms postprocess per image at shape (1, 3, 48

In [2]:
import cv2
import numpy as np
from ultralytics import YOLO
import time
import pyautogui

# Load the YOLO11n-pose model
model = YOLO("yolo11n-pose.pt")

# Initialize webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Keypoint indices for right arm (COCO format)
RIGHT_SHOULDER = 6
RIGHT_ELBOW = 8
RIGHT_WRIST = 10

# Variables for wave detection
prev_arm_positions = None
wave_positions = []  # Track arm positions for more robust wave detection
wave_start_time = None
last_slide_change = 0
cooldown = 1  # Increased cooldown between slide changes
min_wave_amplitude = 50  # Minimum movement to consider a wave
min_wave_cycles = 2  # Minimum back-and-forth movements needed
wave_timeout = 2.5  # Time window to complete a wave gesture

# Visual feedback variables
slide_changed = False
slide_change_display_time = 0
display_duration = 2.0  # How long to show "Slide Changed" message

def calculate_arm_center(shoulder, elbow, wrist):
    """Calculate the center point of the arm"""
    return np.array([(shoulder[0] + elbow[0] + wrist[0]) / 3, 
                     (shoulder[1] + elbow[1] + wrist[1]) / 3])

def detect_arm_wave_gesture(positions, min_cycles=2, min_amplitude=50):
    """
    Detect wave gesture by analyzing arm center position changes
    Returns True if a valid wave pattern is detected
    """
    if len(positions) < min_cycles * 2 + 1:  # Need at least 2 cycles (4 direction changes + start)
        return False
    
    # Extract x-coordinates (horizontal movement)
    x_positions = [pos[0] for pos in positions]
    
    # Calculate direction changes
    directions = []
    for i in range(1, len(x_positions)):
        if x_positions[i] - x_positions[i-1] > 8:  # Moving right
            directions.append(1)
        elif x_positions[i] - x_positions[i-1] < -8:  # Moving left
            directions.append(-1)
        else:
            directions.append(0)  # No significant movement
    
    # Count direction changes
    direction_changes = 0
    prev_dir = 0
    
    for direction in directions:
        if direction != 0 and direction != prev_dir and prev_dir != 0:
            direction_changes += 1
        if direction != 0:
            prev_dir = direction
    
    # Calculate total amplitude
    min_x = min(x_positions)
    max_x = max(x_positions)
    total_amplitude = max_x - min_x
    
    # Wave detected if we have enough direction changes and amplitude
    return direction_changes >= min_cycles and total_amplitude >= min_amplitude

def draw_arm_skeleton(frame, shoulder, elbow, wrist):
    """Draw the arm skeleton with connections"""
    # Draw arm bones
    cv2.line(frame, (int(shoulder[0]), int(shoulder[1])), 
             (int(elbow[0]), int(elbow[1])), (0, 255, 255), 3)
    cv2.line(frame, (int(elbow[0]), int(elbow[1])), 
             (int(wrist[0]), int(wrist[1])), (0, 255, 255), 3)
    
    # Draw joint points
    cv2.circle(frame, (int(shoulder[0]), int(shoulder[1])), 8, (255, 0, 0), -1)  # Blue shoulder
    cv2.circle(frame, (int(elbow[0]), int(elbow[1])), 8, (0, 255, 0), -1)       # Green elbow
    cv2.circle(frame, (int(wrist[0]), int(wrist[1])), 8, (0, 0, 255), -1)       # Red wrist
    
    # Labels
    cv2.putText(frame, "Shoulder", (int(shoulder[0]) + 10, int(shoulder[1]) - 10), 
               cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 0, 0), 1)
    cv2.putText(frame, "Elbow", (int(elbow[0]) + 10, int(elbow[1]) - 10), 
               cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)
    cv2.putText(frame, "Wrist", (int(wrist[0]) + 10, int(wrist[1]) - 10), 
               cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 255), 1)

# Main loop
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break
    
    # Flip frame for mirror effect
    frame = cv2.flip(frame, 1)
    
    results = model(frame, conf=0.5, classes=[0])  # Detect only person
    timestamp = time.time()
    
    # Reset slide change flag after display duration
    if slide_changed and (timestamp - slide_change_display_time) > display_duration:
        slide_changed = False
    
    arm_detected = False
    
    for result in results:
        if hasattr(result, 'keypoints') and result.keypoints is not None:
            keypoints = result.keypoints.xy.cpu().numpy()
            confidences = result.keypoints.conf.cpu().numpy()
            
            for kpts, confs in zip(keypoints, confidences):
                # Check if all arm keypoints are detected with good confidence
                if (confs[RIGHT_SHOULDER] > 0.5 and 
                    confs[RIGHT_ELBOW] > 0.5 and 
                    confs[RIGHT_WRIST] > 0.5):
                    
                    arm_detected = True
                    shoulder = kpts[RIGHT_SHOULDER]
                    elbow = kpts[RIGHT_ELBOW]
                    wrist = kpts[RIGHT_WRIST]
                    
                    # Draw arm skeleton
                    draw_arm_skeleton(frame, shoulder, elbow, wrist)
                    
                    # Calculate arm center position
                    arm_center = calculate_arm_center(shoulder, elbow, wrist)
                    
                    # Draw arm center
                    cv2.circle(frame, (int(arm_center[0]), int(arm_center[1])), 12, (255, 255, 0), -1)
                    cv2.putText(frame, "Arm Center", (int(arm_center[0]) + 15, int(arm_center[1])), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)
                    
                    # Initialize wave tracking
                    if prev_arm_positions is None:
                        prev_arm_positions = arm_center
                        wave_positions = [arm_center]
                        wave_start_time = timestamp
                    else:
                        # Add current position to wave tracking
                        wave_positions.append(arm_center)
                        
                        # Keep only recent positions (within wave_timeout)
                        if timestamp - wave_start_time > wave_timeout:
                            wave_positions = wave_positions[-15:]  # Keep last 15 positions
                            wave_start_time = timestamp
                        
                        # Check for wave gesture
                        if detect_arm_wave_gesture(wave_positions, min_wave_cycles, min_wave_amplitude):
                            # Check cooldown
                            if timestamp - last_slide_change > cooldown:
                                print("Arm wave detected! Advancing slide.")
                                pyautogui.press("right")  # Simulate right arrow key
                                last_slide_change = timestamp
                                slide_changed = True
                                slide_change_display_time = timestamp
                                
                                # Reset wave tracking after successful detection
                                wave_positions = [arm_center]
                                wave_start_time = timestamp
                    
                    prev_arm_positions = arm_center
    
    # Visual feedback on screen
    # Status box background
    cv2.rectangle(frame, (10, 10), (450, 140), (50, 50, 50), -1)
    cv2.rectangle(frame, (10, 10), (450, 140), (255, 255, 255), 2)
    
    # Arm detection status
    if arm_detected:
        cv2.putText(frame, "Full Arm Detected: YES", (20, 35), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    else:
        cv2.putText(frame, "Full Arm Detected: NO", (20, 35), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    
    # Wave detection feedback
    wave_progress = len(wave_positions) if wave_positions else 0
    cv2.putText(frame, f"Wave Progress: {wave_progress}/15", (20, 60), 
               cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
    
    # Current amplitude if tracking
    if len(wave_positions) > 1:
        x_positions = [pos[0] for pos in wave_positions]
        current_amplitude = max(x_positions) - min(x_positions)
        cv2.putText(frame, f"Wave Amplitude: {current_amplitude:.0f}/{min_wave_amplitude}", (20, 85), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
    
    # Slide change status
    if slide_changed:
        cv2.putText(frame, "SLIDE CHANGED!", (20, 110), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
    else:
        cv2.putText(frame, "No slide change", (20, 110), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (128, 128, 128), 2)
    
    # Cooldown timer
    time_since_last = timestamp - last_slide_change
    if time_since_last < cooldown:
        remaining_cooldown = cooldown - time_since_last
        cv2.putText(frame, f"Cooldown: {remaining_cooldown:.1f}s", (20, 135), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 128, 0), 2)
    else:
        cv2.putText(frame, "Ready for next slide", (20, 135), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    
    # Instructions
    cv2.putText(frame, "Wave your entire right arm to advance slides", (10, frame.shape[0] - 80), 
               cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
    cv2.putText(frame, "System tracks shoulder, elbow, and wrist", (10, frame.shape[0] - 55), 
               cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 2)
    cv2.putText(frame, "Need clear back-and-forth motion", (10, frame.shape[0] - 30), 
               cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 2)
    cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 5), 
               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (150, 150, 150), 1)
    
    cv2.imshow("Whole Arm Wave to Advance Slide", frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


0: 480x640 1 person, 50.1ms
Speed: 2.3ms preprocess, 50.1ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 47.8ms
Speed: 1.3ms preprocess, 47.8ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 44.8ms
Speed: 2.0ms preprocess, 44.8ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 40.7ms
Speed: 1.2ms preprocess, 40.7ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 40.0ms
Speed: 1.2ms preprocess, 40.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 41.1ms
Speed: 1.0ms preprocess, 41.1ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 46.0ms
Speed: 1.1ms preprocess, 46.0ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 42.5ms
Speed: 1.0ms preprocess, 42.5ms inference, 0.8ms postprocess per image at shape (1, 3, 48