In [4]:
import cv2
import numpy as np
import pandas as pd
from ultralytics import YOLO
import time
import pyautogui

# Load the YOLO11n-pose model
model = YOLO("yolo11n-pose.pt")

# Initialize webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Define keypoint indices for elbows (based on COCO 17-keypoint format)
LEFT_SHOULDER = 5
RIGHT_SHOULDER = 6
LEFT_ELBOW = 7
RIGHT_ELBOW = 8
LEFT_WRIST = 9
RIGHT_WRIST = 10

last_trigger_time = 0
cooldown = 1.5

# List to store elbow coordinates and timestamps
data = []

cycle = ""

# Process video frames
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    # Perform pose estimation
    results = model(frame, conf=0.5, classes=[0])  # Detect only person class

    # Get current timestamp
    timestamp = time.time()

    # Process results
    for result in results:
        if hasattr(result, 'keypoints') and result.keypoints is not None:
            keypoints = result.keypoints.xy.cpu().numpy()  # Shape: (num_persons, 17, 2)
            confidences = result.keypoints.conf.cpu().numpy()  # Shape: (num_persons, 17)

            for person_idx, (kpts, confs) in enumerate(zip(keypoints, confidences)):
                # Extract elbow coordinates

                # === RIGHT SHOULDER ===
                if confs[RIGHT_SHOULDER] > 0.5:
                    x, y = kpts[RIGHT_SHOULDER]
                    cv2.circle(frame, (int(x), int(y)), 8, (255, 0, 0), -1)  # Blue

                # === RIGHT ELBOW ===
                if confs[RIGHT_ELBOW] > 0.5:
                    x, y = kpts[RIGHT_ELBOW]
                    cv2.circle(frame, (int(x), int(y)), 8, (0, 255, 0), -1)  # Green

                # === RIGHT WRIST ===
                if confs[RIGHT_WRIST] > 0.5:
                    x, y = kpts[RIGHT_WRIST]
                    cv2.circle(frame, (int(x), int(y)), 8, (0, 0, 255), -1)  # Red

                if confs[LEFT_SHOULDER] > 0.5:
                    x, y = kpts[LEFT_SHOULDER]
                    cv2.circle(frame, (int(x), int(y)), 8, (255, 0, 0), -1)  # Blue

                if confs[LEFT_ELBOW] > 0.5:
                    x, y = kpts[LEFT_ELBOW]
                    cv2.circle(frame, (int(x), int(y)), 8, (0, 255, 0), -1)  # Green

                if confs[LEFT_WRIST] > 0.5:
                    x, y = kpts[LEFT_WRIST]
                    cv2.circle(frame, (int(x), int(y)), 8, (0, 0, 255), -1)  # Red

                # Draw Lines
                if confs[RIGHT_SHOULDER] > 0.5 and confs[RIGHT_ELBOW] > 0.5:
                    cv2.line(frame, tuple(kpts[RIGHT_SHOULDER].astype(int)), tuple(kpts[RIGHT_ELBOW].astype(int)), (100, 100, 255), 2)
                if confs[RIGHT_ELBOW] > 0.5 and confs[RIGHT_WRIST] > 0.5:
                    cv2.line(frame, tuple(kpts[RIGHT_ELBOW].astype(int)), tuple(kpts[RIGHT_WRIST].astype(int)), (100, 100, 255), 2)
                if confs[LEFT_SHOULDER] > 0.5 and confs[LEFT_ELBOW] > 0.5:
                    cv2.line(frame, tuple(kpts[LEFT_SHOULDER].astype(int)), tuple(kpts[LEFT_ELBOW].astype(int)), (100, 100, 255), 2)
                if confs[LEFT_ELBOW] > 0.5 and confs[LEFT_WRIST] > 0.5:
                    cv2.line(frame, tuple(kpts[LEFT_ELBOW].astype(int)), tuple(kpts[LEFT_WRIST].astype(int)), (100, 100, 255), 2)


                if confs[RIGHT_WRIST] > 0.5 and confs[RIGHT_SHOULDER] > 0.5:
                    wrist_y = kpts[RIGHT_WRIST][1]
                    shoulder_y = kpts[RIGHT_SHOULDER][1]

                    if wrist_y < shoulder_y - 40 and time.time() - last_trigger_time > cooldown:
                        pyautogui.press('right')
                        print("Next slide triggered")
                        last_trigger_time = time.time()
                        cycle = "Next"
                
                if confs[LEFT_WRIST] > 0.5 and confs[LEFT_SHOULDER] > 0.5:
                    wrist_y = kpts[LEFT_WRIST][1]
                    shoulder_y = kpts[LEFT_SHOULDER][1]

                    if wrist_y < shoulder_y - 40 and time.time() - last_trigger_time > cooldown:
                        pyautogui.press('left')
                        print("Previous slide triggered")
                        last_trigger_time = time.time()
                        cycle = "Previous"

                if time.time() - last_trigger_time < 1:
                    cv2.putText(frame, cycle + " Slide Triggered", (30, 50), cv2.FONT_HERSHEY_SIMPLEX,
                                1, (0, 0, 255), 3)



    # Display the frame
    cv2.imshow("Elbow Tracking", frame)

    # Break loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


0: 480x640 1 person, 93.5ms
Speed: 3.6ms preprocess, 93.5ms inference, 6.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 43.9ms
Speed: 1.6ms preprocess, 43.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 47.7ms
Speed: 1.2ms preprocess, 47.7ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 44.0ms
Speed: 1.2ms preprocess, 44.0ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 41.6ms
Speed: 2.3ms preprocess, 41.6ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 42.8ms
Speed: 1.1ms preprocess, 42.8ms inference, 0.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 46.4ms
Speed: 1.2ms preprocess, 46.4ms inference, 0.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 37.6ms
Speed: 1.8ms preprocess, 37.6ms inference, 0.8ms postprocess per image at shape (1, 3, 48