In [None]:
pip install ultralytics

In [None]:
from ultralytics import YOLO
import cv2
import torch
import os
from google.colab.patches import cv2_imshow  # for Colab display

# 1. Paths & Parameters
input_video   = "/content/How to play the Cover Drive - Cover Drive Technique and Tips.mp4"
output_video  = "/content/Output/output_video.mp4"
conf_threshold = 0.5

os.makedirs(os.path.dirname(output_video), exist_ok=True)

# 2. Device Check
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Running on {device.upper()}")

# 3. Load Models
pose_model = YOLO("/content/yolo11x-pose.pt").to(device)
obj_model  = YOLO("/content/yolo12x.pt").to(device)

# 4. Keypoint Colors (Body Joints Only: COCO Indices 5 to 16)
keypoint_colors = [
    (0, 255, 255),  # 0: Left Shoulder
    (128, 0, 128),  # 1: Right Shoulder
    (255, 128, 0),  # 2: Left Elbow
    (0, 128, 255),  # 3: Right Elbow
    (128, 255, 0),  # 4: Left Wrist
    (255, 0, 128),  # 5: Right Wrist
    (0, 128, 0),    # 6: Left Hip
    (128, 0, 0),    # 7: Right Hip
    (0, 255, 128),  # 8: Left Knee
    (128, 128, 0),  # 9: Right Knee
    (0, 0, 128),    # 10: Left Ankle
    (128, 128, 255) # 11: Right Ankle
]

# 5. Define Skeleton Connections (Based on Body Keypoints)
connections = [
    (0, 1),      # Left Shoulder to Right Shoulder
    (0, 2),      # Left Shoulder to Left Elbow
    (2, 4),      # Left Elbow to Left Wrist
    (1, 3),      # Right Shoulder to Right Elbow
    (3, 5),      # Right Elbow to Right Wrist
    (6, 8),      # Left Hip to Left Knee
    (8, 10),     # Left Knee to Left Ankle
    (7, 9),      # Right Hip to Right Knee
    (9, 11),     # Right Knee to Right Ankle
    (0, 6),      # Left Shoulder to Left Hip
    (1, 7),      # Right Shoulder to Right Hip
    (6, 7)       # Left Hip to Right Hip
]

# 6. Open Video & Setup Writer
cap = cv2.VideoCapture(input_video)
if not cap.isOpened():
    raise RuntimeError("Error: cannot open video file.")

width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps    = cap.get(cv2.CAP_PROP_FPS)

# Compute frame sampling to process 6 frames per second
skip_frames = max(1, int(fps / 6))
print(f"Original FPS: {fps}, Processing every {skip_frames}th frame to achieve ~6 FPS")

fourcc = cv2.VideoWriter_fourcc(*'XVID')
out    = cv2.VideoWriter(output_video, fourcc, fps, (width, height))

# 7. Processing Loop
frame_idx     = 0
processed_cnt = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Process only every nth frame to achieve 6 FPS
    if frame_idx % skip_frames != 0:
        frame_idx += 1
        continue

    # Object Tracking (Bat and Ball)
    obj_results = obj_model.track(frame, persist=True)

    # Pose Estimation
    pose_results = pose_model(frame)

    # Draw Pose Keypoints & Skeleton
    kpts = pose_results[0].keypoints.data
    if kpts is not None:
        for person in kpts:  # Each person detected
            # Extract only body keypoints (COCO indices 5 to 16)
            if len(person) >= 17:
                body_kpts = person[5:17]
            else:
                continue  # Skip if keypoints are incomplete

            # Draw Keypoints
            for idx, point in enumerate(body_kpts):
                if len(point) == 3:
                    x, y, conf = point
                else:
                    x, y, conf = point[0], point[1], 1.0
                if conf >= conf_threshold:
                    color = keypoint_colors[idx]
                    cv2.circle(frame, (int(x), int(y)), 5, color, -1)

            # Draw Skeleton Lines
            for pt1, pt2 in connections:
                p1 = body_kpts[pt1]
                p2 = body_kpts[pt2]
                if len(p1) == 3:
                    x1, y1, c1 = p1
                else:
                    x1, y1, c1 = p1[0], p1[1], 1.0
                if len(p2) == 3:
                    x2, y2, c2 = p2
                else:
                    x2, y2, c2 = p2[0], p2[1], 1.0
                if c1 >= conf_threshold and c2 >= conf_threshold:
                    line_color = keypoint_colors[pt1]
                    cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), line_color, 2)

    # Draw Bat & Ball Detections
    for box in obj_results[0].boxes:
        cls_id = int(box.cls)
        if cls_id == 34 or cls_id == 32:  # 34: bat, 32: ball
            x1, y1, x2, y2 = box.xyxy[0].int().tolist()
            if cls_id == 34:
                color, label = (255, 0, 0), "bat"  # Blue for bat
            else:
                color, label = (0, 255, 255), "ball"  # Yellow for ball
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1 - 8), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Write & Display
    out.write(frame)
    cv2_imshow(frame)

    processed_cnt += 1
    if processed_cnt % 100 == 0:
        print(f"Processed {processed_cnt} frames.")

    frame_idx += 1

# 8. Cleanup
cap.release()
out.release()
cv2.destroyAllWindows()
print(f"Processing complete. Output saved to {output_video}")