In [4]:
import cv2
import numpy as np
from ultralytics import YOLO
import pyttsx3
import threading
import queue
import cvzone

# Initialize YOLO model
model = YOLO('yolov8n-pose.pt')  # Pose estimation model

# Replace real-time capture with a video file
video_path = "Squat.mp4"  # Update with the path to your video file
cap = cv2.VideoCapture(video_path)

# Thresholds and counters
down_thresh = 70  # Angle threshold for squat "down" position
up_thresh = 160   # Angle threshold for squat "up" position
squat_counter = 0
squat_down = False

# Initialize text-to-speech engine
engine = pyttsx3.init()
engine.setProperty('rate', 150)
engine.setProperty('voice', engine.getProperty('voices')[1].id)
speech_queue = queue.Queue()

# Text-to-speech
def speak(text):
    speech_queue.put(text)

def worker_speak():
    while True:
        text = speech_queue.get()
        if text is None:
            break
        engine.say(text)
        engine.runAndWait()

# Angle calculation
def calculate_angle(a, b, c):
    a = np.array(a)
    b = np.array(b)
    c = np.array(c)

    radians = np.arctan2(c[1] - b[1], c[0] - b[0]) - np.arctan2(a[1] - b[1], a[0] - b[0])
    angle = np.abs(radians * 180.0 / np.pi)

    if angle > 180.0:
        angle = 360 - angle

    return angle

# Start thread for TTS
thread_speak = threading.Thread(target=worker_speak, daemon=True)
thread_speak.start()

# Main loop for video capture and processing
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    frame = cv2.resize(frame, (1020, 500))
    
    # Perform detection
    results = model(frame)
    
    for result in results:
        if hasattr(result, "keypoints") and result.keypoints is not None:
            keypoints = result.keypoints.xy.numpy()  # Convert keypoints to numpy array
            
            for keypoint in keypoints:
                # Draw keypoints on the frame
                for i, point in enumerate(keypoint):
                    cx, cy = int(point[0]), int(point[1])
                    cv2.circle(frame, (cx, cy), 5, (255, 0, 0), -1)
                    cvzone.putTextRect(frame, f'{i}', (cx, cy), 1, 1)
                
                # Check if the necessary keypoints are available for squats
                if len(keypoint) > 15:  # Ensure enough keypoints are detected
                    hip = (keypoint[11][0], keypoint[11][1])  # Hip keypoint
                    knee = (keypoint[13][0], keypoint[13][1])  # Knee keypoint
                    ankle = (keypoint[15][0], keypoint[15][1])  # Ankle keypoint
                    
                    knee_angle = calculate_angle(hip, knee, ankle)
                    
                    # Display knee angle on frame
                    cvzone.putTextRect(frame, f'Knee Angle: {int(knee_angle)}', (50, 50), 1, 1, colorR=(255, 0, 0))
                    
                    # Squat counting logic
                    if knee_angle < down_thresh and not squat_down:
                        squat_down = True  # Detected "down" position
                    elif knee_angle > up_thresh and squat_down:
                        squat_counter += 1  # Increment counter on "up" position
                        squat_down = False
                        speak(f'Squat {squat_counter}')  # Announce count

    # Display squat counter
    cvzone.putTextRect(frame, f'Squat Counter: {squat_counter}', (50, 100), 1, 1, colorR=(0, 255, 0))
    
    # Display the frame
    cv2.imshow("Pose Detection - Squats", frame)
    
    # Exit on 'Esc' key press
    key = cv2.waitKey(1)
    if key == 27:  # Esc key
        break

# Release resources
cap.release()
cv2.destroyAllWindows()



0: 320x640 1 person, 93.0ms
Speed: 3.0ms preprocess, 93.0ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 97.3ms
Speed: 3.0ms preprocess, 97.3ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 100.9ms
Speed: 3.0ms preprocess, 100.9ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 98.6ms
Speed: 3.0ms preprocess, 98.6ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 77.3ms
Speed: 3.0ms preprocess, 77.3ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 65.2ms
Speed: 2.7ms preprocess, 65.2ms inference, 2.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 57.1ms
Speed: 2.5ms preprocess, 57.1ms inference, 0.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 person, 56.8ms
Speed: 2.0ms preprocess, 56.8ms inference, 1.0ms postprocess per image at shape (1, 3, 