In [1]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import torch
import torchvision.transforms as transforms
from PIL import Image
import warnings
warnings.filterwarnings('ignore')
from ultralytics import YOLO
from scipy.optimize import linear_sum_assignment
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


  from scipy.stats import gaussian_kde


In [2]:
# Configuration and Setup
class Config:
    BROADCAST_VIDEO = "data/broadcast.mp4"
    TACTICAM_VIDEO = "data/tacticam.mp4"
    MODEL_PATH = "best.pt"
    MAPPING_JSON = "mapping.json"
    
    FRAME_SKIP = 5  
    CONFIDENCE_THRESHOLD = 0.5
    IOU_THRESHOLD = 0.4
    
    # Feature extraction parameters
    FEATURE_DIM = 512
    TEMPORAL_WINDOW = 10  
    
    OUTPUT_DIR = Path("outputs")
    
Config.OUTPUT_DIR.mkdir(exist_ok=True)


In [3]:
# Loads a custom YOLOv11 model and prepares it for inference on GPU or CPU
def load_model():
    import torch
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f"Using device: {device}")
    
    model = YOLO(Config.MODEL_PATH)
    print(f"Custom YOLOv11 model loaded successfully from {Config.MODEL_PATH}")
    
    if torch.cuda.is_available():
        model.to(device)
        print(f"Model moved to GPU: {torch.cuda.get_device_name(0)}")
    else:
        print("GPU not available, running on CPU")
    
    print(f"Model classes: {list(model.names.values())}")
    
    return model

model = load_model()


Using device: cuda
Custom YOLOv11 model loaded successfully from best.pt
Model moved to GPU: NVIDIA GeForce RTX 3060 Laptop GPU
Model classes: ['ball', 'goalkeeper', 'player', 'referee']


In [4]:
# Loads videos and extracts frames with optional skipping and max frame limit
def load_video(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Error opening video file: {video_path}")
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    print(f"Video info: {frame_count} frames, {fps:.2f} FPS, {width}x{height}")
    return cap, {'fps': fps, 'frame_count': frame_count, 'width': width, 'height': height}

def extract_frames(cap, max_frames=None, frame_skip=1):
    frames = []
    frame_indices = []
    frame_idx = 0
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
            
        if frame_idx % frame_skip == 0:
            frames.append(frame)
            frame_indices.append(frame_idx)
            
        frame_idx += 1
        
        if max_frames and len(frames) >= max_frames:
            break
    
    cap.release()
    return frames, frame_indices

broadcast_cap, broadcast_info = load_video(Config.BROADCAST_VIDEO)
tacticam_cap, tacticam_info = load_video(Config.TACTICAM_VIDEO)


Video info: 132 frames, 24.81 FPS, 1920x1080
Video info: 201 frames, 24.63 FPS, 1920x1080


In [5]:
# Detects players in video frames using a YOLO model and processes detections per frame
def detect_players(model, frame, conf_threshold=0.3):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    results = model(frame, conf=conf_threshold, device=device, verbose=False)
    
    detections = []
    player_classes = [1, 2, 3]
    for result in results:
        boxes = result.boxes
        if boxes is not None:
            for box in boxes:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                confidence = box.conf[0].cpu().numpy()
                class_id = int(box.cls[0].cpu().numpy())
                
                if class_id in player_classes:
                    width = x2 - x1
                    height = y2 - y1
                    area = width * height
                    
                    if width > 10 and height > 20 and area > 300:
                        detections.append({
                            'bbox': [int(x1), int(y1), int(x2), int(y2)],
                            'confidence': float(confidence),
                            'center': [(x1 + x2) / 2, (y1 + y2) / 2],
                            'area': area,
                            'class_id': class_id,
                            'class_name': model.names[class_id]
                        })
    
    return detections

def process_video_detections(model, video_path, frame_skip=3, max_frames=100):
    cap = cv2.VideoCapture(video_path)
    detections_by_frame = {}
    frame_idx = 0
    processed_frames = 0
    
    print(f"Processing {video_path}...")
    
    while True:
        ret, frame = cap.read()
        if not ret or processed_frames >= max_frames:
            break
        
        if frame_idx % frame_skip == 0:
            detections = detect_players(model, frame, Config.CONFIDENCE_THRESHOLD)
            detections_by_frame[frame_idx] = detections
            processed_frames += 1
            
            if processed_frames % 15 == 0:
                print(f"Processed {processed_frames} frames...")
        
        frame_idx += 1
    
    cap.release()
    total_detections = sum(len(dets) for dets in detections_by_frame.values())
    print(f"Completed processing {processed_frames} frames, {total_detections} total detections")
    return detections_by_frame

broadcast_detections = process_video_detections(model, Config.BROADCAST_VIDEO, Config.FRAME_SKIP)
tacticam_detections = process_video_detections(model, Config.TACTICAM_VIDEO, Config.FRAME_SKIP)


Processing data/broadcast.mp4...
Processed 15 frames...
Completed processing 27 frames, 304 total detections
Processing data/tacticam.mp4...
Processed 15 frames...
Processed 30 frames...
Completed processing 41 frames, 904 total detections


In [6]:
# Extracts visual and spatial features and builds player tracks across video frames
def extract_visual_features(frame, bbox):
    x1, y1, x2, y2 = bbox
    player_crop = frame[y1:y2, x1:x2]
    
    if player_crop.size == 0:
        return np.zeros(Config.FEATURE_DIM)
    
    player_crop = cv2.resize(player_crop, (64, 128))
    player_crop = cv2.cvtColor(player_crop, cv2.COLOR_BGR2RGB)
    
    hist_features = []
    for i in range(3):
        hist = cv2.calcHist([player_crop], [i], None, [32], [0, 256])
        hist_features.extend(hist.flatten())
    
    gray = cv2.cvtColor(player_crop, cv2.COLOR_RGB2GRAY)
    grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    
    texture_features = [
        np.mean(grad_x), np.std(grad_x),
        np.mean(grad_y), np.std(grad_y)
    ]
    
    features = np.array(hist_features + texture_features)
    
    if len(features) < Config.FEATURE_DIM:
        features = np.pad(features, (0, Config.FEATURE_DIM - len(features)))
    else:
        features = features[:Config.FEATURE_DIM]
    
    return features

def extract_spatial_features(bbox, frame_shape):
    x1, y1, x2, y2 = bbox
    frame_height, frame_width = frame_shape[:2]
    
    features = {
        'center_x': (x1 + x2) / 2 / frame_width,
        'center_y': (y1 + y2) / 2 / frame_height,
        'width': (x2 - x1) / frame_width,
        'height': (y2 - y1) / frame_height,
        'area': ((x2 - x1) * (y2 - y1)) / (frame_width * frame_height),
        'aspect_ratio': (x2 - x1) / max(1, (y2 - y1))
    }
    
    return np.array(list(features.values()))

def build_player_tracks(detections_by_frame, max_distance=80, max_frame_gap=6, min_track_length=3):
    tracks = []
    
    for frame_idx in sorted(detections_by_frame.keys()):
        detections = detections_by_frame[frame_idx]
        
        for detection in detections:
            detection['frame'] = frame_idx
            assigned = False
            
            for track in tracks:
                if len(track) > 0:
                    last_detection = track[-1]
                    
                    distance = np.sqrt((detection['center'][0] - last_detection['center'][0])**2 + 
                                       (detection['center'][1] - last_detection['center'][1])**2)
                    
                    frame_gap = frame_idx - last_detection['frame']
                    
                    if (distance < max_distance and 
                        frame_gap <= max_frame_gap and 
                        detection.get('class_id') == last_detection.get('class_id')):
                        track.append(detection)
                        assigned = True
                        break
            
            if not assigned:
                tracks.append([detection])
    
    filtered_tracks = [track for track in tracks if len(track) >= min_track_length]
    return filtered_tracks

broadcast_tracks = build_player_tracks(broadcast_detections)
print(f"Found {len(broadcast_tracks)} tracks in broadcast video")

tacticam_tracks = build_player_tracks(tacticam_detections)
print(f"Found {len(tacticam_tracks)} tracks in tacticam video")


Found 19 tracks in broadcast video
Found 50 tracks in tacticam video


In [7]:
# Performs enhanced multi-video soccer player mapping using visual, spatial, and temporal features
from scipy.spatial.distance import cdist

def detect_soccer_players(model, frame, conf_threshold=0.3):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    results = model(frame, conf=conf_threshold, device=device, verbose=False)
    
    detections = []
    player_classes = [1, 2, 3]
    for result in results:
        boxes = result.boxes
        if boxes is not None:
            for box in boxes:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                confidence = box.conf[0].cpu().numpy()
                class_id = int(box.cls[0].cpu().numpy())
                if class_id in player_classes:
                    width = x2 - x1
                    height = y2 - y1
                    area = width * height
                    if width > 10 and height > 20 and area > 300:
                        detections.append({
                            'bbox': [int(x1), int(y1), int(x2), int(y2)],
                            'confidence': float(confidence),
                            'center': [(x1 + x2) / 2, (y1 + y2) / 2],
                            'area': area,
                            'class_id': class_id,
                            'class_name': model.names[class_id],
                            'width': width,
                            'height': height
                        })
    return detections

def process_soccer_video(model, video_path, frame_skip=2, max_frames=200):
    cap = cv2.VideoCapture(video_path)
    detections_by_frame = {}
    frame_idx = 0
    processed_frames = 0
    print(f"Processing {video_path} for soccer players...")
    while True:
        ret, frame = cap.read()
        if not ret or processed_frames >= max_frames:
            break
        if frame_idx % frame_skip == 0:
            detections = detect_soccer_players(model, frame, conf_threshold=0.3)
            detections_by_frame[frame_idx] = detections
            processed_frames += 1
            if processed_frames % 20 == 0:
                player_count = len(detections)
                class_breakdown = {}
                for det in detections:
                    class_name = det['class_name']
                    class_breakdown[class_name] = class_breakdown.get(class_name, 0) + 1
                print(f"Frame {frame_idx}: {player_count} total - {class_breakdown}")
        frame_idx += 1
    cap.release()
    total_detections = sum(len(dets) for dets in detections_by_frame.values())
    print(f"Completed: {processed_frames} frames, {total_detections} total player detections")
    return detections_by_frame

def extract_temporal_features(track):
    if len(track) < 2:
        return np.zeros(8)
    positions = np.array([det['center'] for det in track])
    frames = np.array([det['frame'] for det in track])
    velocities = []
    for i in range(1, len(positions)):
        dx = positions[i][0] - positions[i-1][0]
        dy = positions[i][1] - positions[i-1][1]
        dt = frames[i] - frames[i-1]
        if dt > 0:
            vx = dx / dt
            vy = dy / dt
            velocities.append([vx, vy])
    if not velocities:
        return np.zeros(8)
    velocities = np.array(velocities)
    speed = np.sqrt(velocities[:, 0]**2 + velocities[:, 1]**2)
    features = [
        np.mean(speed),
        np.max(speed),
        np.var(speed),
        np.mean(velocities[:, 0]),
        np.mean(velocities[:, 1]),
        frames[-1] - frames[0],
        np.sqrt((positions[-1][0] - positions[0][0])**2 + (positions[-1][1] - positions[0][1])**2),
        np.var(positions, axis=0).mean()
    ]
    return np.array(features)

def compute_enhanced_track_features(track, video_path):
    cap = cv2.VideoCapture(video_path)
    visual_features = []
    spatial_features = []
    for detection in track:
        frame_idx = detection['frame']
        bbox = detection['bbox']
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()
        if ret:
            vis_feat = extract_visual_features(frame, bbox)
            visual_features.append(vis_feat)
            spat_feat = extract_spatial_features(bbox, frame.shape)
            spatial_features.append(spat_feat)
    cap.release()
    avg_visual = np.mean(visual_features, axis=0) if visual_features else np.zeros(Config.FEATURE_DIM)
    avg_spatial = np.mean(spatial_features, axis=0) if spatial_features else np.zeros(6)
    temporal_features = extract_temporal_features(track)
    return np.concatenate([avg_visual, avg_spatial, temporal_features])

def calculate_similarity_matrix(broadcast_features, tacticam_features):
    similarity_matrix = np.zeros((len(broadcast_features), len(tacticam_features)))
    scaler = StandardScaler()
    if len(broadcast_features) > 0 and len(tacticam_features) > 0:
        all_features = broadcast_features + tacticam_features
        all_features_scaled = scaler.fit_transform(all_features)
        broadcast_scaled = all_features_scaled[:len(broadcast_features)]
        tacticam_scaled = all_features_scaled[len(broadcast_features):]
        for i, b_feat in enumerate(broadcast_scaled):
            for j, t_feat in enumerate(tacticam_scaled):
                cos_sim = cosine_similarity([b_feat], [t_feat])[0, 0]
                euclidean_dist = np.linalg.norm(b_feat - t_feat)
                euclidean_sim = 1 / (1 + euclidean_dist)
                if len(b_feat) > 1:
                    correlation = np.corrcoef(b_feat, t_feat)[0, 1]
                    correlation = 0 if np.isnan(correlation) else correlation
                else:
                    correlation = 0
                combined_similarity = (0.5 * cos_sim + 0.3 * euclidean_sim + 0.2 * abs(correlation))
                similarity_matrix[i, j] = max(0, min(1, combined_similarity))
    noise = np.random.normal(0, 0.001, similarity_matrix.shape)
    similarity_matrix = np.clip(similarity_matrix + noise, 0, 1)
    return similarity_matrix

def enhanced_player_mapping(broadcast_tracks, tacticam_tracks):
    print("Computing enhanced features for all tracks...")
    broadcast_features = []
    broadcast_classes = []
    for i, track in enumerate(broadcast_tracks):
        features = compute_enhanced_track_features(track, Config.BROADCAST_VIDEO)
        broadcast_features.append(features)
        broadcast_classes.append(track[0].get('class_name', 'unknown'))
        if (i + 1) % 5 == 0:
            print(f"Processed {i + 1}/{len(broadcast_tracks)} broadcast tracks")
    tacticam_features = []
    tacticam_classes = []
    for i, track in enumerate(tacticam_tracks):
        features = compute_enhanced_track_features(track, Config.TACTICAM_VIDEO)
        tacticam_features.append(features)
        tacticam_classes.append(track[0].get('class_name', 'unknown'))
        if (i + 1) % 5 == 0:
            print(f"Processed {i + 1}/{len(tacticam_tracks)} tacticam tracks")
    print("Calculating similarity matrix...")
    similarity_matrix = calculate_similarity_matrix(broadcast_features, tacticam_features)
    print("Applying class-based constraints...")
    for i, b_class in enumerate(broadcast_classes):
        for j, t_class in enumerate(tacticam_classes):
            if b_class != t_class:
                similarity_matrix[i, j] = 0
    print("Finding optimal assignment...")
    row_indices, col_indices = linear_sum_assignment(-similarity_matrix)
    mappings = []
    for row, col in zip(row_indices, col_indices):
        similarity_score = similarity_matrix[row, col]
        if similarity_score > 0.15:
            mappings.append({
                'broadcast_track_id': row,
                'tacticam_track_id': col,
                'similarity_score': similarity_score,
                'player_class': broadcast_classes[row],
                'broadcast_track_length': len(broadcast_tracks[row]),
                'tacticam_track_length': len(tacticam_tracks[col])
            })
    return mappings, similarity_matrix

print("=" * 60)
print("RUNNING COMPLETE ENHANCED PLAYER MAPPING PIPELINE")
print("=" * 60)
print("Detecting players in broadcast video...")
broadcast_detections = process_video_detections(model, Config.BROADCAST_VIDEO, Config.FRAME_SKIP)
print("Detecting players in tacticam video...")
tacticam_detections = process_video_detections(model, Config.TACTICAM_VIDEO, Config.FRAME_SKIP)
print("Building tracks for broadcast video...")
broadcast_tracks = build_player_tracks(broadcast_detections)
print(f"Found {len(broadcast_tracks)} tracks in broadcast video")
print("Building tracks for tacticam video...")
tacticam_tracks = build_player_tracks(tacticam_detections)
print(f"Found {len(tacticam_tracks)} tracks in tacticam video")
print("Performing enhanced player mapping...")
player_mappings, similarity_matrix = enhanced_player_mapping(broadcast_tracks, tacticam_tracks)
print(f"\nFound {len(player_mappings)} player mappings:")
for i, mapping in enumerate(player_mappings):
    print(f"Player {i+1}: {mapping['player_class']} - B{mapping['broadcast_track_id']} -> T{mapping['tacticam_track_id']} "
          f"(similarity: {mapping['similarity_score']:.3f})")


RUNNING COMPLETE ENHANCED PLAYER MAPPING PIPELINE
Detecting players in broadcast video...
Processing data/broadcast.mp4...
Processed 15 frames...
Completed processing 27 frames, 304 total detections
Detecting players in tacticam video...
Processing data/tacticam.mp4...
Processed 15 frames...
Processed 30 frames...
Completed processing 41 frames, 904 total detections
Building tracks for broadcast video...
Found 19 tracks in broadcast video
Building tracks for tacticam video...
Found 50 tracks in tacticam video
Performing enhanced player mapping...
Computing enhanced features for all tracks...
Processed 5/19 broadcast tracks
Processed 10/19 broadcast tracks
Processed 15/19 broadcast tracks
Processed 5/50 tacticam tracks
Processed 10/50 tacticam tracks
Processed 15/50 tacticam tracks
Processed 20/50 tacticam tracks
Processed 25/50 tacticam tracks
Processed 30/50 tacticam tracks
Processed 35/50 tacticam tracks
Processed 40/50 tacticam tracks
Processed 45/50 tacticam tracks
Processed 50/50 

In [8]:
# Performs enhanced multi-video soccer player mapping using visual, spatial, and temporal features
from scipy.spatial.distance import cdist

def detect_soccer_players(model, frame, conf_threshold=0.3):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    results = model(frame, conf=conf_threshold, device=device, verbose=False)
    
    detections = []
    player_classes = [1, 2, 3]
    for result in results:
        boxes = result.boxes
        if boxes is not None:
            for box in boxes:
                x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
                confidence = box.conf[0].cpu().numpy()
                class_id = int(box.cls[0].cpu().numpy())
                if class_id in player_classes:
                    width = x2 - x1
                    height = y2 - y1
                    area = width * height
                    if width > 10 and height > 20 and area > 300:
                        detections.append({
                            'bbox': [int(x1), int(y1), int(x2), int(y2)],
                            'confidence': float(confidence),
                            'center': [(x1 + x2) / 2, (y1 + y2) / 2],
                            'area': area,
                            'class_id': class_id,
                            'class_name': model.names[class_id],
                            'width': width,
                            'height': height
                        })
    return detections

def process_soccer_video(model, video_path, frame_skip=2, max_frames=200):
    cap = cv2.VideoCapture(video_path)
    detections_by_frame = {}
    frame_idx = 0
    processed_frames = 0
    print(f"Processing {video_path} for soccer players...")
    while True:
        ret, frame = cap.read()
        if not ret or processed_frames >= max_frames:
            break
        if frame_idx % frame_skip == 0:
            detections = detect_soccer_players(model, frame, conf_threshold=0.3)
            detections_by_frame[frame_idx] = detections
            processed_frames += 1
            if processed_frames % 20 == 0:
                player_count = len(detections)
                class_breakdown = {}
                for det in detections:
                    class_name = det['class_name']
                    class_breakdown[class_name] = class_breakdown.get(class_name, 0) + 1
                print(f"Frame {frame_idx}: {player_count} total - {class_breakdown}")
        frame_idx += 1
    cap.release()
    total_detections = sum(len(dets) for dets in detections_by_frame.values())
    print(f"Completed: {processed_frames} frames, {total_detections} total player detections")
    return detections_by_frame

def extract_temporal_features(track):
    if len(track) < 2:
        return np.zeros(8)
    positions = np.array([det['center'] for det in track])
    frames = np.array([det['frame'] for det in track])
    velocities = []
    for i in range(1, len(positions)):
        dx = positions[i][0] - positions[i-1][0]
        dy = positions[i][1] - positions[i-1][1]
        dt = frames[i] - frames[i-1]
        if dt > 0:
            vx = dx / dt
            vy = dy / dt
            velocities.append([vx, vy])
    if not velocities:
        return np.zeros(8)
    velocities = np.array(velocities)
    speed = np.sqrt(velocities[:, 0]**2 + velocities[:, 1]**2)
    features = [
        np.mean(speed),
        np.max(speed),
        np.var(speed),
        np.mean(velocities[:, 0]),
        np.mean(velocities[:, 1]),
        frames[-1] - frames[0],
        np.sqrt((positions[-1][0] - positions[0][0])**2 + (positions[-1][1] - positions[0][1])**2),
        np.var(positions, axis=0).mean()
    ]
    return np.array(features)

def compute_enhanced_track_features(track, video_path):
    cap = cv2.VideoCapture(video_path)
    visual_features = []
    spatial_features = []
    for detection in track:
        frame_idx = detection['frame']
        bbox = detection['bbox']
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()
        if ret:
            vis_feat = extract_visual_features(frame, bbox)
            visual_features.append(vis_feat)
            spat_feat = extract_spatial_features(bbox, frame.shape)
            spatial_features.append(spat_feat)
    cap.release()
    avg_visual = np.mean(visual_features, axis=0) if visual_features else np.zeros(Config.FEATURE_DIM)
    avg_spatial = np.mean(spatial_features, axis=0) if spatial_features else np.zeros(6)
    temporal_features = extract_temporal_features(track)
    return np.concatenate([avg_visual, avg_spatial, temporal_features])

def calculate_similarity_matrix(broadcast_features, tacticam_features):
    similarity_matrix = np.zeros((len(broadcast_features), len(tacticam_features)))
    scaler = StandardScaler()
    if len(broadcast_features) > 0 and len(tacticam_features) > 0:
        all_features = broadcast_features + tacticam_features
        all_features_scaled = scaler.fit_transform(all_features)
        broadcast_scaled = all_features_scaled[:len(broadcast_features)]
        tacticam_scaled = all_features_scaled[len(broadcast_features):]
        for i, b_feat in enumerate(broadcast_scaled):
            for j, t_feat in enumerate(tacticam_scaled):
                cos_sim = cosine_similarity([b_feat], [t_feat])[0, 0]
                euclidean_dist = np.linalg.norm(b_feat - t_feat)
                euclidean_sim = 1 / (1 + euclidean_dist)
                if len(b_feat) > 1:
                    correlation = np.corrcoef(b_feat, t_feat)[0, 1]
                    correlation = 0 if np.isnan(correlation) else correlation
                else:
                    correlation = 0
                combined_similarity = (0.5 * cos_sim + 0.3 * euclidean_sim + 0.2 * abs(correlation))
                similarity_matrix[i, j] = max(0, min(1, combined_similarity))
    noise = np.random.normal(0, 0.001, similarity_matrix.shape)
    similarity_matrix = np.clip(similarity_matrix + noise, 0, 1)
    return similarity_matrix

def enhanced_player_mapping(broadcast_tracks, tacticam_tracks):
    print("Computing enhanced features for all tracks...")
    broadcast_features = []
    broadcast_classes = []
    for i, track in enumerate(broadcast_tracks):
        features = compute_enhanced_track_features(track, Config.BROADCAST_VIDEO)
        broadcast_features.append(features)
        broadcast_classes.append(track[0].get('class_name', 'unknown'))
        if (i + 1) % 5 == 0:
            print(f"Processed {i + 1}/{len(broadcast_tracks)} broadcast tracks")
    tacticam_features = []
    tacticam_classes = []
    for i, track in enumerate(tacticam_tracks):
        features = compute_enhanced_track_features(track, Config.TACTICAM_VIDEO)
        tacticam_features.append(features)
        tacticam_classes.append(track[0].get('class_name', 'unknown'))
        if (i + 1) % 5 == 0:
            print(f"Processed {i + 1}/{len(tacticam_tracks)} tacticam tracks")
    print("Calculating similarity matrix...")
    similarity_matrix = calculate_similarity_matrix(broadcast_features, tacticam_features)
    print("Applying class-based constraints...")
    for i, b_class in enumerate(broadcast_classes):
        for j, t_class in enumerate(tacticam_classes):
            if b_class != t_class:
                similarity_matrix[i, j] = 0
    print("Finding optimal assignment...")
    row_indices, col_indices = linear_sum_assignment(-similarity_matrix)
    mappings = []
    for row, col in zip(row_indices, col_indices):
        similarity_score = similarity_matrix[row, col]
        if similarity_score > 0.15:
            mappings.append({
                'broadcast_track_id': row,
                'tacticam_track_id': col,
                'similarity_score': similarity_score,
                'player_class': broadcast_classes[row],
                'broadcast_track_length': len(broadcast_tracks[row]),
                'tacticam_track_length': len(tacticam_tracks[col])
            })
    return mappings, similarity_matrix

print("=" * 60)
print("RUNNING COMPLETE ENHANCED PLAYER MAPPING PIPELINE")
print("=" * 60)
print("Detecting players in broadcast video...")
broadcast_detections = process_video_detections(model, Config.BROADCAST_VIDEO, Config.FRAME_SKIP)
print("Detecting players in tacticam video...")
tacticam_detections = process_video_detections(model, Config.TACTICAM_VIDEO, Config.FRAME_SKIP)
print("Building tracks for broadcast video...")
broadcast_tracks = build_player_tracks(broadcast_detections)
print(f"Found {len(broadcast_tracks)} tracks in broadcast video")
print("Building tracks for tacticam video...")
tacticam_tracks = build_player_tracks(tacticam_detections)
print(f"Found {len(tacticam_tracks)} tracks in tacticam video")
print("Performing enhanced player mapping...")
player_mappings, similarity_matrix = enhanced_player_mapping(broadcast_tracks, tacticam_tracks)
print(f"\nFound {len(player_mappings)} player mappings:")
for i, mapping in enumerate(player_mappings):
    print(f"Player {i+1}: {mapping['player_class']} - B{mapping['broadcast_track_id']} -> T{mapping['tacticam_track_id']} "
          f"(similarity: {mapping['similarity_score']:.3f})")



RUNNING COMPLETE ENHANCED PLAYER MAPPING PIPELINE
Detecting players in broadcast video...
Processing data/broadcast.mp4...
Processed 15 frames...
Completed processing 27 frames, 304 total detections
Detecting players in tacticam video...
Processing data/tacticam.mp4...
Processed 15 frames...
Processed 30 frames...
Completed processing 41 frames, 904 total detections
Building tracks for broadcast video...
Found 19 tracks in broadcast video
Building tracks for tacticam video...
Found 50 tracks in tacticam video
Performing enhanced player mapping...
Computing enhanced features for all tracks...
Processed 5/19 broadcast tracks
Processed 10/19 broadcast tracks
Processed 15/19 broadcast tracks
Processed 5/50 tacticam tracks
Processed 10/50 tacticam tracks
Processed 15/50 tacticam tracks
Processed 20/50 tacticam tracks
Processed 25/50 tacticam tracks
Processed 30/50 tacticam tracks
Processed 35/50 tacticam tracks
Processed 40/50 tacticam tracks
Processed 45/50 tacticam tracks
Processed 50/50 

In [10]:
# Generates frame-by-frame labeled data for matched and unmatched player tracks
def create_frame_by_frame_data(broadcast_tracks, tacticam_tracks, mappings):
    track_to_player = {}
    for i, mapping in enumerate(mappings):
        player_id = f"player_{i+1:02d}"
        track_to_player[('broadcast', mapping['broadcast_track_id'])] = player_id
        track_to_player[('tacticam', mapping['tacticam_track_id'])] = player_id

    csv_data = []

    for track_idx, track in enumerate(broadcast_tracks):
        player_id = track_to_player.get(('broadcast', track_idx), f"unmatched_b_{track_idx}")
        for detection in track:
            csv_data.append({
                'frame': detection['frame'],
                'camera': 'broadcast',
                'player_id': player_id,
                'bbox_x1': detection['bbox'][0],
                'bbox_y1': detection['bbox'][1],
                'bbox_x2': detection['bbox'][2],
                'bbox_y2': detection['bbox'][3],
                'center_x': detection['center'][0],
                'center_y': detection['center'][1],
                'confidence': detection['confidence'],
                'class': detection.get('class_name', 'unknown')
            })

    for track_idx, track in enumerate(tacticam_tracks):
        player_id = track_to_player.get(('tacticam', track_idx), f"unmatched_t_{track_idx}")
        for detection in track:
            csv_data.append({
                'frame': detection['frame'],
                'camera': 'tacticam',
                'player_id': player_id,
                'bbox_x1': detection['bbox'][0],
                'bbox_y1': detection['bbox'][1],
                'bbox_x2': detection['bbox'][2],
                'bbox_y2': detection['bbox'][3],
                'center_x': detection['center'][0],
                'center_y': detection['center'][1],
                'confidence': detection['confidence'],
                'class': detection.get('class_name', 'unknown')
            })

    return csv_data


In [None]:
# Final Optimized Pipeline with GPU Acceleration and Improved Similarity
print("\n" + "=" * 70)
print("🚀 RUNNING FINAL OPTIMIZED GPU-ACCELERATED PIPELINE")
print("=" * 70)

# Re-run with GPU optimizations and better parameters
print("🔄 Re-processing with GPU acceleration...")

# Enhanced configuration for better results
Config.CONFIDENCE_THRESHOLD = 0.25  # Lower threshold for more detections
Config.FRAME_SKIP = 3  # Process more frames

# Clear GPU cache
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("✅ GPU cache cleared")

# Re-detect with GPU acceleration
print("🔍 GPU-accelerated detection on broadcast video...")
broadcast_detections_gpu = process_video_detections(model, Config.BROADCAST_VIDEO, Config.FRAME_SKIP, max_frames=150)

print("🔍 GPU-accelerated detection on tacticam video...")
tacticam_detections_gpu = process_video_detections(model, Config.TACTICAM_VIDEO, Config.FRAME_SKIP, max_frames=150)

# Build improved tracks with better parameters
print("🔗 Building improved tracks...")
broadcast_tracks_gpu = build_player_tracks(broadcast_detections_gpu, max_distance=60, max_frame_gap=8, min_track_length=2)
tacticam_tracks_gpu = build_player_tracks(tacticam_detections_gpu, max_distance=60, max_frame_gap=8, min_track_length=2)

print(f"📊 GPU Results:")
print(f"   Broadcast tracks: {len(broadcast_tracks_gpu)}")
print(f"   Tacticam tracks: {len(tacticam_tracks_gpu)}")

# Enhanced mapping with improved similarity calculation
print("🎯 Running enhanced mapping with improved similarity...")
player_mappings_gpu, similarity_matrix_gpu = enhanced_player_mapping(broadcast_tracks_gpu, tacticam_tracks_gpu)

print(f"\n✅ GPU-Optimized Results:")
print(f"   🎯 Player mappings: {len(player_mappings_gpu)}")
print(f"   📊 Similarity matrix shape: {similarity_matrix_gpu.shape}")
print(f"   🔢 Similarity range: {similarity_matrix_gpu.min():.3f} - {similarity_matrix_gpu.max():.3f}")

# Check for improvement in zero values
zero_count_gpu = np.sum(similarity_matrix_gpu < 0.01)
total_values_gpu = similarity_matrix_gpu.size
zero_percentage_gpu = (zero_count_gpu / total_values_gpu) * 100

print(f"   🎯 Zero/near-zero values: {zero_count_gpu}/{total_values_gpu} ({zero_percentage_gpu:.1f}%)")

# Generate final outputs with GPU-optimized data
print("\n📦 Generating final GPU-optimized outputs...")
csv_data_gpu = create_frame_by_frame_data(broadcast_tracks_gpu, tacticam_tracks_gpu, player_mappings_gpu)
df_gpu = pd.DataFrame(csv_data_gpu)
df_gpu.to_csv(Config.OUTPUT_DIR / 'gpu_optimized_tracking_results.csv', index=False)

# Final visualization
plt.figure(figsize=(12, 10))
viz_matrix_gpu = similarity_matrix_gpu[:min(20, similarity_matrix_gpu.shape[0]), :min(20, similarity_matrix_gpu.shape[1])]

sns.heatmap(viz_matrix_gpu, 
            annot=True, 
            fmt='.2f', 
            cmap='RdYlGn',
            center=0.5,     
            vmin=0,         
            vmax=1,         
            square=True,    
            linewidths=0.5)

plt.title('🚀 GPU-Optimized Player Similarity Matrix\n(Enhanced Features + CUDA Acceleration)', fontsize=14, pad=20)
plt.xlabel('Tacticam Tracks', fontsize=12)
plt.ylabel('Broadcast Tracks', fontsize=12)
plt.tight_layout()
plt.savefig(Config.OUTPUT_DIR / 'gpu_optimized_similarity_matrix.png', dpi=200, bbox_inches='tight')
plt.show()

print(f"\n🎉 GPU-OPTIMIZED PIPELINE COMPLETE!")
print(f"✅ Files generated:")
print(f"   📄 outputs/gpu_optimized_tracking_results.csv")
print(f"   📊 outputs/gpu_optimized_similarity_matrix.png")

# Memory cleanup
if torch.cuda.is_available():
    print(f"\n💾 Final GPU Memory Usage:")
    print(f"   Allocated: {torch.cuda.memory_allocated()/1024**2:.1f} MB")
    print(f"   Cached: {torch.cuda.memory_reserved()/1024**2:.1f} MB")