In [2]:
import cv2
import mediapipe as mp
import numpy as np
from ultralytics import YOLO
import cvzone
import os
from playsound import playsound 

path = r"D:\Computer Vision\Ai-Tracker-Exercise"
os.chdir(path)
from tracker import Tracker

mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Model
model = YOLO("yolov8s.pt")

# Data
with open(r"coco.txt", "r") as df:
    classes = df.read().split("\n")



cap = cv2.VideoCapture(r"pushup.mp4")

counter = 0
stage = None

def calculate_angle(a, b, c):
    a, b, c = np.array(a), np.array(b), np.array(c)
    
    angle_ab = np.arctan2(a[1] - b[1], a[0] - b[0])
    angle_bc = np.arctan2(c[1] - b[1], c[0] - b[0])
    
    angle = np.abs(angle_ab - angle_bc)
    angle = np.degrees(angle)
    
    if angle > 180:
        angle = 360 - angle
    return angle

# Initialize tracker
tracker = Tracker()


alarm_path = r"D:\Computer Vision\Ai-Tracker-Exercise\assets_alarm.mp3"


while True:
    ret, frame = cap.read()
    if not ret:
        print("Error loading video.")
        break

    frame = cv2.resize(frame, (1100, 700))
    h, w, _ = frame.shape

    
    results = model(frame)
    result = pose.process(frame)

    
    if result.pose_landmarks:
        mp_drawing.draw_landmarks(
            frame, result.pose_landmarks, mp_pose.POSE_CONNECTIONS,
            mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=2),
            mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2)
        )
        
        landmarks = result.pose_landmarks.landmark
        
        L_shoulder = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].x * w,
                      landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value].y * h]
        L_elbow = [landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].x * w,
                   landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value].y * h]
        L_wrist = [landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].x * w,
                   landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value].y * h]
        
        R_shoulder = [landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].x * w,
                      landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value].y * h]
        R_elbow = [landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].x * w,
                   landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value].y * h]
        R_wrist = [landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value].x * w,
                   landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value].y * h]

        
        l_angle = calculate_angle(L_shoulder, L_elbow, L_wrist)
        r_angle = calculate_angle(R_shoulder, R_elbow, R_wrist)

        cv2.putText(frame, f"left_angle: {str(int(l_angle))}", tuple(np.multiply(L_elbow, [1, 1]).astype(int)),
                    cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 0), 2)
        
        cv2.putText(frame, f"right_angle: {str(int(r_angle))}", tuple(np.multiply(R_elbow, [1, 1]).astype(int)),
                    cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 255), 3)
        
        
        
        if l_angle > 160 and r_angle > 160:
            stage = "UP"
        if (l_angle < 70 and stage == "UP") and (r_angle < 70 and stage == "UP"):
            stage = "Down"
            counter += 1
            print(counter)
            
            playsound(alarm_path)
               
                

    cvzone.putTextRect(frame, f"Counter: {counter}", (10, 60), scale=3, thickness=2, colorR=(255, 255, 0), colorT=(0, 0, 0))
    cvzone.putTextRect(frame, f"State: {stage}", (10, 120), scale=3, thickness=2, colorR=(255, 0, 255), colorT=(0, 0, 0))

    
    lis = []
    for res in results:
        for box in res.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            name = classes[int(box.cls[0])]
            if "person" in name and box.conf[0] > 0.5:
                lis.append([x1, y1, x2, y2])

    bbox_id = tracker.update(lis)
    for bb in bbox_id:
        x1, y1, x2, y2, idd = bb
        ww = x2 - x1
        hh = y2 - y1
        
        cvzone.putTextRect(frame, f"Person ID: {idd}", (x1+140, y1-10), scale=1, thickness=1, colorR=(0, 255, 0), colorT=(0, 0, 0))
        cvzone.cornerRect(frame, (x1, y1, ww, hh), l=15, t=15, rt=2, colorR=(0, 0, 0), colorC=(0, 255, 0))
        
    cv2.imshow("Frame", frame)

    if cv2.waitKey(1) == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()


0: 416x640 1 person, 352.1ms
Speed: 8.0ms preprocess, 352.1ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 488.1ms
Speed: 8.0ms preprocess, 488.1ms inference, 4.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 300.7ms
Speed: 4.0ms preprocess, 300.7ms inference, 3.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 281.8ms
Speed: 4.0ms preprocess, 281.8ms inference, 3.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 270.8ms
Speed: 4.0ms preprocess, 270.8ms inference, 4.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 266.3ms
Speed: 6.0ms preprocess, 266.3ms inference, 3.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 268.8ms
Speed: 4.0ms preprocess, 268.8ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 person, 253.8ms
Speed: 5.0ms preprocess, 253.8ms inference, 2.0ms postprocess per image at