In [45]:
# Install required libraries
!pip install ultralytics mediapipe

import cv2
import numpy as np
from ultralytics import YOLO
import colorsys
import gdown
import os
from scipy.optimize import linear_sum_assignment
import mediapipe as mp



In [48]:
# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)

# Step 1: Download the object detection model from Google Drive
model_url = "https://drive.google.com/uc?id=1-5fOSHOSB9UXyP_enOoZNAMScrePVcMD"
model_path = "/kaggle/working/model.pt"
gdown.download(model_url, model_path, quiet=False)

# Step 2: Load the model
model = YOLO(model_path)

# Step 3: Load the videos
broadcast_path = "/kaggle/input/task-liat-ai/broadcast.mp4"
tacticam_path = "/kaggle/input/task-liat-ai/tacticam.mp4"

broadcast_cap = cv2.VideoCapture(broadcast_path)
tacticam_cap = cv2.VideoCapture(tacticam_path)

# Debug: Check if videos are loaded
print("Broadcast video opened:", broadcast_cap.isOpened())
print("Tacticam video opened:", tacticam_cap.isOpened())

W0000 00:00:1749378286.173518    1985 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1749378286.230096    1985 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
Downloading...
From (original): https://drive.google.com/uc?id=1-5fOSHOSB9UXyP_enOoZNAMScrePVcMD
From (redirected): https://drive.google.com/uc?id=1-5fOSHOSB9UXyP_enOoZNAMScrePVcMD&confirm=t&uuid=2a265af4-b2cd-49ac-ab1f-e49e8108793c
To: /kaggle/working/model.pt
100%|██████████| 195M/195M [00:01<00:00, 138MB/s] 


Broadcast video opened: True
Tacticam video opened: True


In [49]:
# Dictionaries to store player tracks
broadcast_players = {}  # {frame: {id: {features}}}
tacticam_players = {}   # {frame: {id: {features}}}
player_id_counter = 0
player_mappings = {}    # Final mapping of broadcast ID -> tacticam ID

def extract_pose_features(frame, bbox):
    """Extract pose features using MediaPipe."""
    x, y, w, h = bbox
    roi = frame[y:y+h, x:x+w]
    # Convert BGR to RGB for MediaPipe
    roi_rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
    results = pose.process(roi_rgb)
    if results.pose_landmarks:
        # Extract key landmarks (e.g., shoulders, hips)
        landmarks = results.pose_landmarks.landmark
        # Use a subset of keypoints (e.g., shoulders, hips) as a feature vector
        keypoints = [
            (landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].x, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].y),
            (landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].x, landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].y),
            (landmarks[mp_pose.PoseLandmark.LEFT_HIP].x, landmarks[mp_pose.PoseLandmark.LEFT_HIP].y),
            (landmarks[mp_pose.PoseLandmark.RIGHT_HIP].x, landmarks[mp_pose.PoseLandmark.RIGHT_HIP].y)
        ]
        return np.array(keypoints).flatten()
    return np.zeros(8)  # Return zero vector if no pose detected

def calculate_iou(box1, box2):
    """Calculate Intersection over Union (IoU) for two bounding boxes."""
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2
    
    xi1 = max(x1, x2)
    yi1 = max(y1, y2)
    xi2 = min(x1 + w1, x2 + w2)
    yi2 = min(y1 + h1, y2 + h2)
    
    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    box1_area = w1 * h1
    box2_area = w2 * h2
    union_area = box1_area + box2_area - inter_area
    
    return inter_area / union_area if union_area > 0 else 0

def compute_relative_distances(players, current_id):
    """Compute relative distances to other players in the frame."""
    if not players:
        return []
    current_pos = players[current_id]['position']
    distances = []
    for p_id, p_data in players.items():
        if p_id == current_id:
            continue
        other_pos = p_data['position']
        dist = np.sqrt((current_pos[0] - other_pos[0])**2 + (current_pos[1] - other_pos[1])**2)
        distances.append(dist)
    return sorted(distances)

def compute_cost(b_player, t_player):
    """Compute a cost for matching two players based on pose and relative spatial features."""
    # Pose cost (Euclidean distance between keypoint vectors)
    pose_cost = np.linalg.norm(b_player['pose'] - t_player['pose'])
    
    # Relative spatial cost
    b_dists = b_player['relative_distances']
    t_dists = t_player['relative_distances']
    if len(b_dists) != len(t_dists):
        spatial_cost = 1.0
    else:
        spatial_cost = np.mean((np.array(b_dists) - np.array(t_dists))**2)
    
    # Combine costs
    return 0.6 * pose_cost + 0.4 * spatial_cost

# Process videos frame by frame
frame_count = 0
max_frames = 300
while broadcast_cap.isOpened() and tacticam_cap.isOpened() and frame_count < max_frames:
    ret_b, frame_b = broadcast_cap.read()
    ret_t, frame_t = tacticam_cap.read()
    
    if not ret_b or not ret_t:
        print(f"Frame reading failed at frame {frame_count}: ret_b={ret_b}, ret_t={ret_t}")
        break
    
    # Initialize player data for this frame
    broadcast_players[frame_count] = {}
    tacticam_players[frame_count] = {}
    
    # Detect players in both frames
    results_b = model(frame_b)
    results_t = model(frame_t)
    
    # Debug: Print detected classes
    broadcast_classes = [int(det.cls) for det in results_b[0].boxes]
    tacticam_classes = [int(det.cls) for det in results_t[0].boxes]
    print(f"Frame {frame_count}: Broadcast detected classes = {broadcast_classes}")
    print(f"Frame {frame_count}: Tacticam detected classes = {tacticam_classes}")
    
    # Process broadcast feed
    for det in results_b[0].boxes:
        if len(broadcast_classes) > 0:  # Accept any detected object as a player
            bbox = det.xyxy[0].cpu().numpy()
            bbox = (int(bbox[0]), int(bbox[1]), int(bbox[2] - bbox[0]), int(bbox[3] - bbox[1]))
            pose_features = extract_pose_features(frame_b, bbox)
            position = ((bbox[0] + bbox[2] // 2) / frame_b.shape[1], (bbox[1] + bbox[3] // 2) / frame_b.shape[0])
            
            # Track within broadcast feed
            matched_id = None
            if frame_count > 0:
                for p_id, p_data in broadcast_players[frame_count - 1].items():
                    iou = calculate_iou(bbox, p_data['last_bbox'])
                    if iou > 0.5:
                        matched_id = p_id
                        break
            
            if matched_id is None:
                matched_id = player_id_counter
                player_id_counter += 1
            
            broadcast_players[frame_count][matched_id] = {
                'pose': pose_features,
                'last_bbox': bbox,
                'position': position
            }
    
    # Process tacticam feed
    for det in results_t[0].boxes:
        if len(tacticam_classes) > 0:
            bbox = det.xyxy[0].cpu().numpy()
            bbox = (int(bbox[0]), int(bbox[1]), int(bbox[2] - bbox[0]), int(bbox[3] - bbox[1]))
            pose_features = extract_pose_features(frame_t, bbox)
            position = ((bbox[0] + bbox[2] // 2) / frame_t.shape[1], (bbox[1] + bbox[3] // 2) / frame_t.shape[0])
            
            # Track within tacticam feed
            matched_id = None
            if frame_count > 0:
                for p_id, p_data in tacticam_players[frame_count - 1].items():
                    iou = calculate_iou(bbox, p_data['last_bbox'])
                    if iou > 0.5:
                        matched_id = p_id
                        break
            
            if matched_id is None:
                matched_id = player_id_counter
                player_id_counter += 1
            
            tacticam_players[frame_count][matched_id] = {
                'pose': pose_features,
                'last_bbox': bbox,
                'position': position
            }
    
    # Compute relative distances for this frame
    for p_id in broadcast_players[frame_count]:
        broadcast_players[frame_count][p_id]['relative_distances'] = compute_relative_distances(broadcast_players[frame_count], p_id)
    for p_id in tacticam_players[frame_count]:
        tacticam_players[frame_count][p_id]['relative_distances'] = compute_relative_distances(tacticam_players[frame_count], p_id)
    
    # Debug: Print number of players detected
    print(f"Frame {frame_count}: Broadcast players = {len(broadcast_players[frame_count])}, Tacticam players = {len(tacticam_players[frame_count])}")
    
    frame_count += 1
    if frame_count % 50 == 0:
        print(f"Processed {frame_count} frames...")

print(f"Total frames processed: {frame_count}")

# Aggregate player data across frames
broadcast_avg = {}
tacticam_avg = {}
for f in range(frame_count):
    for p_id, p_data in broadcast_players[f].items():
        if p_id not in broadcast_avg:
            broadcast_avg[p_id] = {'pose': [], 'relative_distances': []}
        broadcast_avg[p_id]['pose'].append(p_data['pose'])
        broadcast_avg[p_id]['relative_distances'].append(p_data['relative_distances'])
    
    for p_id, p_data in tacticam_players[f].items():
        if p_id not in tacticam_avg:
            tacticam_avg[p_id] = {'pose': [], 'relative_distances': []}
        tacticam_avg[p_id]['pose'].append(p_data['pose'])
        tacticam_avg[p_id]['relative_distances'].append(p_data['relative_distances'])

# Debug: Print total players after aggregation
print("Total broadcast players:", len(broadcast_avg))
print("Total tacticam players:", len(tacticam_avg))

# Compute average features for each player
for p_id in broadcast_avg:
    broadcast_avg[p_id]['pose'] = np.mean(broadcast_avg[p_id]['pose'], axis=0)
    max_len = max(len(d) for d in broadcast_avg[p_id]['relative_distances'])
    padded_dists = [np.pad(d, (0, max_len - len(d)), 'constant') for d in broadcast_avg[p_id]['relative_distances']]
    broadcast_avg[p_id]['relative_distances'] = np.mean(padded_dists, axis=0)

for p_id in tacticam_avg:
    tacticam_avg[p_id]['pose'] = np.mean(tacticam_avg[p_id]['pose'], axis=0)
    max_len = max(len(d) for d in tacticam_avg[p_id]['relative_distances'])
    padded_dists = [np.pad(d, (0, max_len - len(d)), 'constant') for d in tacticam_avg[p_id]['relative_distances']]
    tacticam_avg[p_id]['relative_distances'] = np.mean(padded_dists, axis=0)

# Match players using Hungarian algorithm
b_ids = list(broadcast_avg.keys())
t_ids = list(tacticam_avg.keys())
if not b_ids or not t_ids:
    print("No players to match. Check detection logs.")
else:
    cost_matrix = np.zeros((len(b_ids), len(t_ids)))
    
    for i, b_id in enumerate(b_ids):
        for j, t_id in enumerate(t_ids):
            cost_matrix[i, j] = compute_cost(broadcast_avg[b_id], tacticam_avg[t_id])
    
    # Apply Hungarian algorithm
    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    
    # Create one-to-one mappings
    for row, col in zip(row_ind, col_ind):
        b_id = b_ids[row]
        t_id = t_ids[col]
        player_mappings[b_id] = t_id

# Release video captures
broadcast_cap.release()
tacticam_cap.release()

# Output the player ID mappings
print("Player ID Mappings (Broadcast ID -> Tacticam ID):")
if not player_mappings:
    print("No mappings generated. Check debug logs for issues.")
else:
    for b_id, t_id in sorted(player_mappings.items(), key=lambda x: x[0]):
        print(f"Player {b_id} in broadcast feed maps to Player {t_id} in tacticam feed")


0: 384x640 3 players, 68.4ms
Speed: 3.0ms preprocess, 68.4ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 players, 2 referees, 48.4ms
Speed: 2.4ms preprocess, 48.4ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)
Frame 0: Broadcast detected classes = [2, 2, 2]
Frame 0: Tacticam detected classes = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3]


W0000 00:00:1749378297.472210    1986 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Frame 0: Broadcast players = 3, Tacticam players = 24

0: 384x640 2 players, 35.1ms
Speed: 3.7ms preprocess, 35.1ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 players, 1 referee, 41.7ms
Speed: 2.6ms preprocess, 41.7ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)
Frame 1: Broadcast detected classes = [2, 2]
Frame 1: Tacticam detected classes = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3]
Frame 1: Broadcast players = 2, Tacticam players = 23

0: 384x640 4 players, 35.6ms
Speed: 3.5ms preprocess, 35.6ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 22 players, 1 referee, 43.5ms
Speed: 2.6ms preprocess, 43.5ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)
Frame 2: Broadcast detected classes = [2, 2, 2, 2]
Frame 2: Tacticam detected classes = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2]
Frame 2: Broadcast players = 4, Tacticam players =