In [11]:
# Push-up Counter with XP and Fixes for Real Detection
import cv2
import numpy as np
from ultralytics import YOLO
from collections import deque
import os

def calculate_angle(a, b, c):
    a, b, c = np.array(a), np.array(b), np.array(c)
    ba = a - b
    bc = c - b
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    return np.degrees(np.arccos(np.clip(cosine_angle, -1.0, 1.0)))

def get_elbow_angle_with_confidence(keypoints, confs):
    try:
        # Left arm
        ls, le, lw = keypoints[5], keypoints[7], keypoints[9]
        l_conf = min(confs[5], confs[7], confs[9])
        l_angle = calculate_angle(ls, le, lw) if l_conf > 0.3 else None

        # Right arm
        rs, re, rw = keypoints[6], keypoints[8], keypoints[10]
        r_conf = min(confs[6], confs[8], confs[10])
        r_angle = calculate_angle(rs, re, rw) if r_conf > 0.3 else None

        if l_angle and r_angle:
            return min(l_angle, r_angle)
        return l_angle or r_angle
    except:
        return None

class PushupCounter:
    def __init__(self, model_name='yolo11s-pose.pt'):
        self.model = YOLO(model_name)
        self.count = 0
        self.stage = 'up'
        self.down_threshold = 110
        self.up_threshold = 145
        self.angle_history = deque(maxlen=5)
        self.frame_count = 0
        self.last_count_frame = -15
        self.cooldown_frames = 15


    def detect(self, frame):
        results = self.model(frame)
        keypoints = results[0].keypoints.xy[0].cpu().numpy() if results[0].keypoints is not None else None
        confs = results[0].keypoints.conf[0].cpu().numpy() if results[0].keypoints.conf is not None else None
        return results[0], keypoints, confs

    def update(self, angle):
        if angle is None:
            return False

        self.angle_history.append(angle)
        smoothed = np.mean(self.angle_history)

        detected = False
        current_frame = self.frame_count

        if smoothed < self.down_threshold and self.stage == 'up':
            self.stage = 'down'

        elif smoothed > self.up_threshold and self.stage == 'down':
            if (current_frame - self.last_count_frame) > self.cooldown_frames:
                self.stage = 'up'
                self.count += 1
                self.last_count_frame = current_frame
                detected = True

        return detected
    
def process_pushups(video_path, model_name='yolo11s-pose.pt', output_path="output_pushup.mp4"):
    counter = PushupCounter(model_name)
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Video load failed.")
        return

    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    milestone_timer = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        counter.frame_count += 1
        result, keypoints, confs = counter.detect(frame)
        if keypoints is not None:
            frame = result.plot()

        angle = get_elbow_angle_with_confidence(keypoints, confs)
        detected = counter.update(angle)

        if detected:
            overlay = frame.copy()
            cv2.rectangle(overlay, (0, 0), (width, height), (0, 128, 255), -1)
            frame = cv2.addWeighted(frame, 0.8, overlay, 0.2, 0)
            cv2.putText(frame, "PUSH-UP!", (width//2 - 100, height//2), cv2.FONT_HERSHEY_SIMPLEX, 2.0, (255,255,255), 4)
            if counter.count % 10 == 0:
                milestone_timer = 30

        y = 60
        cv2.putText(frame, f"PUSH-UPS: {counter.count}", (20, y), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 100), 4)
        y += 50
        cv2.putText(frame, f"XP: {counter.count * 10}", (width - 250, y), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 150, 50), 4)

        if angle:
            cv2.putText(frame, f"Angle: {angle:.1f}", (width - 250, height - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)

        if milestone_timer > 0:
            cv2.putText(frame, "💪 YOU ROCK! 💪", (width//2 - 240, height//2 - 60), cv2.FONT_HERSHEY_SIMPLEX, 1.8, (0, 255, 255), 6)
            milestone_timer -= 1

        out.write(frame)

    cap.release()
    out.release()
    print(f"Done: {output_path}")

if __name__ == "__main__":
    VIDEO_DIR = "/Users/shruti.kalaskar/Documents/Northwestern/Spring 2025/Computer Vision/Project/"
    MODEL_NAME = "yolo11s-pose.pt"
    video_files = ["push_up_1.mp4", "push_up_2.mp4"]
    for file in video_files:
        input_path = os.path.join(VIDEO_DIR, file)
        output_path = os.path.join(VIDEO_DIR, file.replace(".mp4", "_output.mp4"))
        process_pushups(input_path, MODEL_NAME, output_path)



0: 384x640 1 person, 183.6ms
Speed: 1.7ms preprocess, 183.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 173.6ms
Speed: 1.2ms preprocess, 173.6ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 170.1ms
Speed: 1.3ms preprocess, 170.1ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 160.5ms
Speed: 1.5ms preprocess, 160.5ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 176.0ms
Speed: 1.3ms preprocess, 176.0ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 156.1ms
Speed: 1.3ms preprocess, 156.1ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 190.3ms
Speed: 1.3ms preprocess, 190.3ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 168.4ms
Speed: 3.6ms preprocess, 168.4ms inference, 0.6ms postprocess per image at

  cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))


0: 384x640 1 person, 195.7ms
Speed: 1.5ms preprocess, 195.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 177.9ms
Speed: 1.7ms preprocess, 177.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 162.5ms
Speed: 1.5ms preprocess, 162.5ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 176.8ms
Speed: 1.2ms preprocess, 176.8ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 163.6ms
Speed: 1.3ms preprocess, 163.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 162.4ms
Speed: 1.2ms preprocess, 162.4ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 163.6ms
Speed: 1.3ms preprocess, 163.6ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 169.2ms
Speed: 1.2ms preprocess, 169.2ms inference, 0.6ms postprocess per image at 