In [None]:

from ultralytics import YOLO
import cv2, time

def yolo_pose_estimation(video_path, model_path):
    model = YOLO(model_path)
    video = cv2.VideoCapture(video_path)

    if not video.isOpened():
        print("Error: Could not access the video file.")
        return

    start_time = time.time()
    while time.time() - start_time < 30:
        ret, frame = video.read()
        if not ret:
            break

        results = model(frame)
        for r in results:
            annotated_frame = r.plot()
            cv2.imshow("YOLOv8 Pose", annotated_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    cv2.destroyAllWindows()

# from mmpose.apis import init_model, inference_topdown
# from mmpose.structures import merge_data_samples_vis
from new.mmpose.mmpose.apis import init_model,inference_topdown
from new.mmpose.mmpose.structures import merge_data_samples
import cv2, time

def rtmpose_estimation(video_path, config_file, checkpoint_file, device='cuda:0'):
    model = init_model(config_file, checkpoint_file, device=device)
    video = cv2.VideoCapture(video_path)

    if not video.isOpened():
        print("Error: Could not access the video file.")
        return

    start_time = time.time()
    while time.time() - start_time < 30:
        ret, frame = video.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        person_results = [{'bbox': [0, 0, frame.shape[1], frame.shape[0]]}]  # Assuming full frame
        result = inference_topdown(model, frame_rgb, person_results)
        vis_frame = merge_data_samples(result, frame)

        cv2.imshow("RTMPose", vis_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    cv2.destroyAllWindows()

import mediapipe as mp
import cv2, time

def mediapipe_pose_estimation(video_path):
    mp_pose = mp.solutions.pose
    mp_drawing = mp.solutions.drawing_utils

    pose = mp_pose.Pose()
    video = cv2.VideoCapture(video_path)

    if not video.isOpened():
        print("Error: Could not access the video file.")
        return

    start_time = time.time()
    while time.time() - start_time < 30:
        ret, frame = video.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)

        if results.pose_landmarks:
            mp_drawing.draw_landmarks(
                frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow("MediaPipe Pose", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    cv2.destroyAllWindows()
    pose.close()


video_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\Subject_B.MP4"
yolo_model = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov8n-pose.pt"
rtmpose_config = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\rtmpose_config.py"
rtmpose_checkpoint = "your_rtmpose_model.pth"

# Call the desired method
# yolo_pose_estimation(video_path, yolo_model)
rtmpose_estimation(video_path, rtmpose_config, rtmpose_checkpoint)
# mediapipe_pose_estimation(video_path)


In [1]:
import os
os.getcwd()

'c:\\Users\\vaibh\\OneDrive\\Desktop\\New folder\\Folder Python\\Folder ML\\opencv_example'

In [2]:
import requests

url = "https://huggingface.co/openmmlab/rtmpose-s/resolve/main/rtmpose-s_simcc-body7_8xb256-420e_coco-640x640.pth"
output_path = "rtmpose-s_simcc-body7_8xb256-420e_coco-640x640.pth"

print("Downloading RTMPose checkpoint...")
r = requests.get(url, allow_redirects=True)

if r.status_code == 200:
    with open(output_path, 'wb') as f:
        f.write(r.content)
    print("✅ Download complete:", output_path)
else:
    print("❌ Download failed with status code:", r.status_code)


Downloading RTMPose checkpoint...
❌ Download failed with status code: 401


In [4]:
pip install "qai-hub-models[rtmpose-body2d]" torch==2.4.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch2.4/index.html -f https://qaihub-public-python-wheels.s3.us-west-2.amazonaws.com/index.html


Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  × python setup.py bdist_wheel did not run successfully.
  │ exit code: 1
  ╰─> [1212 lines of output]
        from pkg_resources import DistributionNotFound, get_distribution, parse_version
      Compiling mmcv._ext only with CPU
      running bdist_wheel
      !!
      
              ********************************************************************************
              With Python 2.7 end-of-life, support for building universal wheels
              (i.e., wheels that support both Python 2 and Python 3)
              is being obviated.
              Please discontinue using this option, or if you still need it,
              file an issue with pypa/setuptools describing your use case.
      
              By 2025-Aug-30, you need to update your project and remove deprecated calls
              or your builds will no longer be supported.
              ********************************************************************************
     

Looking in links: https://download.openmmlab.com/mmcv/dist/cpu/torch2.4/index.html, https://qaihub-public-python-wheels.s3.us-west-2.amazonaws.com/index.html
Collecting torch==2.4.1
  Downloading torch-2.4.1-cp39-cp39-win_amd64.whl.metadata (27 kB)
Collecting qai-hub-models[rtmpose-body2d]
  Downloading qai_hub_models-0.33.0-py3-none-any.whl.metadata (54 kB)
Collecting gdown==4.7.1 (from qai-hub-models[rtmpose-body2d])
  Downloading gdown-4.7.1-py3-none-any.whl.metadata (4.4 kB)
Collecting gitpython==3.1.42 (from qai-hub-models[rtmpose-body2d])
  Downloading GitPython-3.1.42-py3-none-any.whl.metadata (12 kB)
Collecting huggingface-hub<1.0,>=0.23.1 (from qai-hub-models[rtmpose-body2d])
  Downloading huggingface_hub-0.34.3-py3-none-any.whl.metadata (14 kB)
Collecting ipython==8.12.3 (from qai-hub-models[rtmpose-body2d])
  Using cached ipython-8.12.3-py3-none-any.whl.metadata (5.7 kB)
Collecting onnx>=1.16.1 (from qai-hub-models[rtmpose-body2d])
  Downloading onnx-1.18.0-cp39-cp39-win_amd

In [7]:
import cv2
import time
from ultralytics import YOLO
import mediapipe as mp


def yolo_pose_estimation(video_path, model_path, output_path="yolo_output.avi"):
    model = YOLO(model_path)
    video = cv2.VideoCapture(video_path)

    if not video.isOpened():
        print("Error: Could not access the video file.")
        return

    # Get video properties
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)

    # VideoWriter to save output
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    while True:
        ret, frame = video.read()
        if not ret:
            break

        results = model(frame)
        for r in results:
            annotated_frame = r.plot()
            out.write(annotated_frame)
            cv2.imshow("YOLOv8 Pose", annotated_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    out.release()
    cv2.destroyAllWindows()


def mediapipe_pose_estimation(video_path, output_path="mediapipe_output.avi"):
    mp_pose = mp.solutions.pose
    mp_drawing = mp.solutions.drawing_utils

    pose = mp_pose.Pose()
    video = cv2.VideoCapture(video_path)

    if not video.isOpened():
        print("Error: Could not access the video file.")
        return

    # Get video properties
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)

    # VideoWriter to save output
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    while True:
        ret, frame = video.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)

        if results.pose_landmarks:
            mp_drawing.draw_landmarks(
                frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        out.write(frame)
        cv2.imshow("MediaPipe Pose", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    out.release()
    cv2.destroyAllWindows()
    pose.close()


# === INPUT PATHS ===
video_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\test2.mp4"
yolo_model = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov8n-pose.pt"

# === RUN POSE ESTIMATION METHODS ===
print("Running YOLOv8 Pose Estimation...")
yolo_pose_estimation(video_path, yolo_model)

print("Running MediaPipe Pose Estimation...")
mediapipe_pose_estimation(video_path)


Running YOLOv8 Pose Estimation...

0: 384x640 1 person, 138.2ms
Speed: 3.6ms preprocess, 138.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 113.2ms
Speed: 3.3ms preprocess, 113.2ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 113.0ms
Speed: 2.9ms preprocess, 113.0ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 114.0ms
Speed: 3.4ms preprocess, 114.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 113.9ms
Speed: 3.1ms preprocess, 113.9ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 114.4ms
Speed: 2.5ms preprocess, 114.4ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 112.6ms
Speed: 3.1ms preprocess, 112.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 110.1ms
Speed: 2.9ms preprocess, 110.1ms inferen

In [None]:
import cv2
import time
from ultralytics import YOLO
import mediapipe as mp


def yolo_pose_and_football_detection(video_path, pose_model_path, detect_model_path, output_path="yolo_pose_football_output.avi"):
    pose_model = YOLO(pose_model_path)      # e.g. yolov8n-pose.pt
    detect_model = YOLO(detect_model_path)  # e.g. yolov8n.pt (detection)

    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print("Error: Could not access the video file.")
        return

    # Get video properties
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)

    # Save output video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    while True:
        ret, frame = video.read()
        if not ret:
            break

        # Run pose estimation
        pose_results = pose_model(frame)
        pose_frame = pose_results[0].plot()

        # Run object detection for football (class 37 = sports ball)
        detect_results = detect_model(frame)[0]
        for box in detect_results.boxes:
            cls_id = int(box.cls[0])
            if cls_id == 37:  # 'sports ball' in COCO dataset
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                cv2.rectangle(pose_frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
                cv2.putText(pose_frame, "Football", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)

        # Display and save
        out.write(pose_frame)
        cv2.imshow("YOLOv8 Pose + Football Detection", pose_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    out.release()
    cv2.destroyAllWindows()




In [12]:
import cv2
import time
from ultralytics import YOLO
import mediapipe as mp


def yolo_pose_and_football_detection(video_path, pose_model_path, detect_model_path, output_path="yolo_pose_football_output.avi"):
    pose_model = YOLO(pose_model_path)      # e.g. yolov8n-pose.pt
    detect_model = YOLO(detect_model_path)  # e.g. yolov8n.pt (object detection)

    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print("Error: Could not access the video file.")
        return

    # Get video properties
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)

    # Save output video
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    while True:
        ret, frame = video.read()
        if not ret:
            break

        # Run pose estimation
        pose_results = pose_model(frame)
        pose_frame = pose_results[0].plot()

        # Run object detection for football (class 37 = sports ball)
        detect_results = detect_model(frame)[0]
        for box in detect_results.boxes:
            cls_id = int(box.cls[0])
            if cls_id == 32:  # COCO class 37 = 'sports ball'
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                cv2.rectangle(pose_frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
                cv2.putText(pose_frame, "Football", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)

        # Display and save
        out.write(pose_frame)
        cv2.imshow("YOLOv8 Pose + Football Detection", pose_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    out.release()
    cv2.destroyAllWindows()


# === Initialization Section ===
video_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\test2.mp4"

pose_model_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov8n-pose.pt"
detect_model_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov8n.pt"

output_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolo_pose_football_output.avi"

# === Run the Function ===
print("Running YOLOv8 Pose + Football Detection...")
yolo_pose_and_football_detection(video_path, pose_model_path, detect_model_path, output_path)


Running YOLOv8 Pose + Football Detection...

0: 384x640 1 person, 95.5ms
Speed: 2.6ms preprocess, 95.5ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 139.3ms
Speed: 2.1ms preprocess, 139.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 83.2ms
Speed: 2.1ms preprocess, 83.2ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 sports ball, 100.0ms
Speed: 1.9ms preprocess, 100.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 268.3ms
Speed: 2.6ms preprocess, 268.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 sports ball, 68.4ms
Speed: 1.9ms preprocess, 68.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 88.6ms
Speed: 3.3ms preprocess, 88.6ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 1 sports ball, 1

KeyboardInterrupt: 

In [6]:
import cv2
from ultralytics import YOLO


def yolo_pose_and_football_detection(video_path, pose_model_path, detect_model_path, output_path="yolo_pose_football_output_yolov9.avi"):
    pose_model = YOLO(pose_model_path)      # yolov8n-pose.pt
    detect_model = YOLO(detect_model_path)  # yolov8n.pt

    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print("❌ Error: Could not access the video file.")
        return

    # Get original video properties
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)

    # Output writer (original resolution)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    # Create display window
    display_width, display_height = 960, 540
    cv2.namedWindow("YOLOv8 Pose + Football Detection", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("YOLOv8 Pose + Football Detection", display_width, display_height)

    while True:
        ret, frame = video.read()
        if not ret:
            break

        annotated_frame = frame.copy()

        # Pose Estimation
        pose_results = pose_model(annotated_frame)[0]
        annotated_frame = pose_results.plot()

        # Object Detection (class 37 = sports ball in COCO)
        detect_results = detect_model(frame, conf=0.01)[0]
        for box in detect_results.boxes:
            cls_id = int(box.cls[0])
            if cls_id == 32:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (0, 255, 255), 2)
                cv2.putText(annotated_frame, "Football", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)

        # Save original-sized frame
        out.write(annotated_frame)

        # Resize for display
        resized_frame = cv2.resize(annotated_frame, (display_width, display_height))
        cv2.imshow("YOLOv8 Pose + Football Detection", resized_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    out.release()
    cv2.destroyAllWindows()

# === PATH SETUP ===
video_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\test2.mp4"
pose_model_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov8n-pose.pt"
# detect_model_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov8n.pt"
detect_model_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov9c.pt"
output_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolo_pose_football_output.avi"

# === RUN ===
print("🎥 Running YOLOv8 Pose + Football Detection...")
yolo_pose_and_football_detection(video_path, pose_model_path, detect_model_path, output_path)


🎥 Running YOLOv8 Pose + Football Detection...

0: 384x640 1 person, 113.0ms
Speed: 3.1ms preprocess, 113.0ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 4 cars, 5 benchs, 3 sports balls, 1 baseball glove, 3 potted plants, 556.8ms
Speed: 2.7ms preprocess, 556.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 104.0ms
Speed: 3.1ms preprocess, 104.0ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 2 cars, 1 traffic light, 1 fire hydrant, 1 stop sign, 5 benchs, 3 sports balls, 1 baseball glove, 1 chair, 5 potted plants, 557.8ms
Speed: 2.6ms preprocess, 557.8ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 106.6ms
Speed: 3.7ms preprocess, 106.6ms inference, 2.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 5 cars, 1 traffic light, 1 fire hydrant, 7 benchs, 3 sports balls, 1 baseball glove, 1 chair, 4 p

In [4]:
model = YOLO('yolov9c.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov9c.pt to 'yolov9c.pt'...


100%|██████████| 49.4M/49.4M [00:49<00:00, 1.04MB/s]


In [20]:
import cv2
from ultralytics import YOLO
import random


def yolo_pose_and_football_detection(video_path, pose_model_path, detect_model_path, output_path="yolo_pose_football_output_yolov9.avi"):
    pose_model = YOLO(pose_model_path)      # yolov8n-pose.pt
    detect_model = YOLO(detect_model_path)  # yolov9c.pt or similar

    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print("❌ Error: Could not access the video file.")
        return

    # Video properties
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)

    # Output writer
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    # Display settings
    display_width, display_height = 960, 540
    cv2.namedWindow("YOLOv9 Pose + Football Detection", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("YOLOv9 Pose + Football Detection", display_width, display_height)

    # Tracker for red dots (simplified by assigning random IDs to red balls)
    ball_id_counter = 0
    red_ball_positions = {}  # frame-wise dict of red dot positions

    while True:
        ret, frame = video.read()
        if not ret:
            break

        annotated_frame = frame.copy()

        # === Pose Estimation ===
        pose_results = pose_model(annotated_frame,conf=0.65)[0]
        annotated_frame = pose_results.plot()

        # === Object Detection ===
        detect_results = detect_model(frame, conf=0.03)[0]

        for box in detect_results.boxes:
            cls_id = int(box.cls[0])
            conf = float(box.conf[0])
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            w, h = x2 - x1, y2 - y1

            label = ""
            color = (0, 255, 0)

            # 1. Handle Human Labeling
            if cls_id == 0:  # 'person' in COCO
                label = "human"
                color = (255, 255, 255)

            # 2. Handle Ball Detection and Label Normalization
            elif cls_id in [32, 37]:  # 32: sports ball, 37: baseball (COCO IDs)
                label = "football"
                color = (0, 255, 255)

            # 3. Handle Red Dot as Ball Tracker (Optional)
            elif (x2 - x1) < 15 and (y2 - y1) < 15 and conf > 0.5:
                # Consider small red dots as ball trackers
                ball_id_counter += 1
                label = f"ball_{ball_id_counter}"
                color = (0, 0, 255)

            if label != "":
                # Draw bounding box
                cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), color, 1)

                # Smaller label text
                cv2.putText(annotated_frame, label, (x1, y1 - 5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1)

        # Save original-sized frame
        out.write(annotated_frame)

        # Resize for display
        resized_frame = cv2.resize(annotated_frame, (display_width, display_height))
        cv2.imshow("YOLOv9 Pose + Football Detection", resized_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    out.release()
    cv2.destroyAllWindows()


# === PATH SETUP ===
# video_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\test2.mp4"
video_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\test.mp4"
pose_model_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov8n-pose.pt"
detect_model_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov9c.pt"
output_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolo_pose_football_output.avi"

# === RUN ===
print("🎥 Running YOLOv9 Pose + Football Detection...")
yolo_pose_and_football_detection(video_path, pose_model_path, detect_model_path, output_path)


🎥 Running YOLOv9 Pose + Football Detection...

0: 384x640 1 person, 117.5ms
Speed: 8.3ms preprocess, 117.5ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 sports balls, 381.3ms
Speed: 1.9ms preprocess, 381.3ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 78.2ms
Speed: 1.2ms preprocess, 78.2ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 sports balls, 351.8ms
Speed: 1.3ms preprocess, 351.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 70.9ms
Speed: 1.2ms preprocess, 70.9ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 4 sports balls, 300.8ms
Speed: 1.2ms preprocess, 300.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 person, 71.7ms
Speed: 1.1ms preprocess, 71.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2

KeyboardInterrupt: 

In [2]:
import cv2
from ultralytics import YOLO
import numpy as np

def yolo_pose_and_football_detection(video_path, pose_model_path, detect_model_path, output_path="yolo_pose_football_output.avi"):
    pose_model = YOLO(pose_model_path)
    detect_model = YOLO(detect_model_path)

    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print("❌ Error: Could not access the video file.")
        return

    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    display_width, display_height = 960, 540
    cv2.namedWindow("YOLOv9 Pose + Football Detection", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("YOLOv9 Pose + Football Detection", display_width, display_height)

    object_id_counter = 0
    prev_positions = {}  # id: (x, y)

    def calculate_velocity(p1, p2, dt):
        dx = p2[0] - p1[0]
        dy = p2[1] - p1[1]
        velocity = (dx**2 + dy**2) ** 0.5 / dt
        return velocity

    frame_idx = 0

    while True:
        ret, frame = video.read()
        if not ret:
            break

        frame_idx += 1
        dt = 1 / fps if fps > 0 else 1

        annotated_frame = frame.copy()

        # === Pose Estimation ===
        pose_results = pose_model.predict(annotated_frame, conf=0.65, verbose=False)[0]
        if pose_results.keypoints is not None:
            for keypoints in pose_results.keypoints.xy:
                for x, y in keypoints:
                    if x > 0 and y > 0:
                        cv2.circle(annotated_frame, (int(x), int(y)), 2, (0, 255, 0), -1)

        # === Object Detection ===
        detect_results = detect_model.predict(frame, conf=0.1, verbose=False)[0]

        for box in detect_results.boxes:
            cls_id = int(box.cls[0])
            conf = float(box.conf[0])
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cx, cy = (x1 + x2) // 2, (y1 + y2) // 2

            # === FOOTBALL TRACKING ===
            if cls_id in [32, 37]:  # sports ball or baseball (COCO)
                assigned_id = None
                for obj_id, (px, py) in prev_positions.items():
                    if np.linalg.norm([cx - px, cy - py]) < 50:
                        assigned_id = obj_id
                        break
                if assigned_id is None:
                    object_id_counter += 1
                    assigned_id = object_id_counter

                velocity = 0
                status = "STATIONARY"
                if assigned_id in prev_positions:
                    velocity = calculate_velocity(prev_positions[assigned_id], (cx, cy), dt)
                    if velocity > 100:
                        status = "ACTION"

                prev_positions[assigned_id] = (cx, cy)

                # Draw blue bounding box
                cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (255, 0, 0), 1)

                # Red dot on football
                cv2.circle(annotated_frame, (cx, cy), 4, (0, 0, 255), -1)

                # Label with black text
                info_text = f"ID {assigned_id} | {status} | V={velocity:.1f} | Conf={conf:.2f}"
                cv2.putText(annotated_frame, info_text, (x1, y1 - 8),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)

            # === HUMAN LABELING ===
            elif cls_id == 0:  # person
                # Draw blue bounding box
                cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (255, 0, 0), 1)
                label = f"ACTION BOX | Conf={conf:.2f}"
                cv2.putText(annotated_frame, label, (x1, y1 - 5),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 0, 0), 1)

        # Write and display
        out.write(annotated_frame)
        resized_frame = cv2.resize(annotated_frame, (display_width, display_height))
        cv2.imshow("YOLOv9 Pose + Football Detection", resized_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    out.release()
    cv2.destroyAllWindows()


# === PATH SETUP ===
video_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\test2.mp4"
pose_model_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov8n-pose.pt"
detect_model_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov9c.pt"
output_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolo_pose_football_output_2.avi"

# === RUN ===
print("🎥 Running YOLOv9 Pose + Football Detection...")
yolo_pose_and_football_detection(video_path, pose_model_path, detect_model_path, output_path)


🎥 Running YOLOv9 Pose + Football Detection...


In [3]:
import cv2
from ultralytics import YOLO
import numpy as np
import mediapipe as mp

def yolo_mediapipe_football_detection(video_path, detect_model_path, output_path="mediapipe_yolo_output.avi"):
    # === Models ===
    detect_model = YOLO(detect_model_path)
    mp_pose = mp.solutions.pose
    mp_drawing = mp.solutions.drawing_utils
    pose = mp_pose.Pose()

    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print("❌ Error: Could not access the video file.")
        return

    # === Video Info ===
    frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = video.get(cv2.CAP_PROP_FPS)
    dt = 1 / fps if fps > 0 else 1

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    display_width, display_height = 960, 540
    cv2.namedWindow("YOLO + MediaPipe Football Detection", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("YOLO + MediaPipe Football Detection", display_width, display_height)

    object_id_counter = 0
    prev_positions = {}  # id: (x, y)

    def calculate_velocity(p1, p2, dt):
        dx = p2[0] - p1[0]
        dy = p2[1] - p1[1]
        return (dx ** 2 + dy ** 2) ** 0.5 / dt

    while True:
        ret, frame = video.read()
        if not ret:
            break

        annotated_frame = frame.copy()

        # === MediaPipe Pose Estimation ===
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)

        if results.pose_landmarks:
            mp_drawing.draw_landmarks(
                annotated_frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        # === YOLO Football Detection ===
        detect_results = detect_model.predict(frame, conf=0.1, verbose=False)[0]

        for box in detect_results.boxes:
            cls_id = int(box.cls[0])
            conf = float(box.conf[0])
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cx, cy = (x1 + x2) // 2, (y1 + y2) // 2

            if cls_id in [32, 37]:  # sports ball / baseball
                assigned_id = None
                for obj_id, (px, py) in prev_positions.items():
                    if np.linalg.norm([cx - px, cy - py]) < 50:
                        assigned_id = obj_id
                        break
                if assigned_id is None:
                    object_id_counter += 1
                    assigned_id = object_id_counter

                velocity = 0
                status = "STATIONARY"
                if assigned_id in prev_positions:
                    velocity = calculate_velocity(prev_positions[assigned_id], (cx, cy), dt)
                    if velocity > 100:
                        status = "ACTION"

                prev_positions[assigned_id] = (cx, cy)

                # Blue bounding box
                cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (255, 0, 0), 1)
                cv2.circle(annotated_frame, (cx, cy), 4, (0, 0, 255), -1)

                # Info label (black text)
                info_text = f"ID {assigned_id} | {status} | V={velocity:.1f} | Conf={conf:.2f}"
                cv2.putText(annotated_frame, info_text, (x1, y1 - 8),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)

        # Save and show
        out.write(annotated_frame)
        resized_frame = cv2.resize(annotated_frame, (display_width, display_height))
        cv2.imshow("YOLO + MediaPipe Football Detection", resized_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    video.release()
    out.release()
    cv2.destroyAllWindows()
    pose.close()


# === PATH SETUP ===
video_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\test2.mp4"
detect_model_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\yolov9c.pt"
output_path = r"C:\Users\vaibh\OneDrive\Desktop\New folder\Folder Python\Folder ML\opencv_example\mediapipe_yolo_output_2.avi"

# === RUN ===
print("🎥 Running MediaPipe + YOLOv9 Football Detection...")
yolo_mediapipe_football_detection(video_path, detect_model_path, output_path)


🎥 Running MediaPipe + YOLOv9 Football Detection...
