In [1]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), 'utils')))
def match_tracks(front_feat_path, side_feat_path, save_path, top_k=1):
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

In [2]:
# main script: run_pipeline.py

import json
import cv2
import numpy as np
from ultralytics import YOLO
from tracker import run_tracking
from draw_utils import draw_boxes
from feature_utils import extract_features
from matcher import match_tracks

# ---- CONFIG ----
VIDEO_FRONT = 'input/broadcast.mp4'
VIDEO_SIDE = 'input/tacticam.mp4'
MODEL_PATH = 'yolov11_model/best.pt'

os.makedirs('detections', exist_ok=True)
os.makedirs('features', exist_ok=True)
os.makedirs('output', exist_ok=True)



Successfully loaded imagenet pretrained weights from "C:\Users\hp333/.cache\torch\checkpoints\osnet_x1_0_imagenet.pth"


In [3]:
# ---- STEP 1: YOLO Detection ----
def run_yolo(video_path, save_path):
    model = YOLO(MODEL_PATH)
    results = model.predict(source=video_path, save=False, conf=0.4)

    all_detections = []
    for result in results:
        frame_data = []
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0].tolist()
            cls = int(box.cls[0])
            conf = float(box.conf[0])
            frame_data.append([x1, y1, x2, y2, cls, conf])
        all_detections.append(frame_data)

    with open(save_path, 'w') as f:
        json.dump(all_detections, f)

print("Running YOLO detections...")
run_tracking(VIDEO_FRONT, MODEL_PATH, 'detections/front.json')
run_tracking(VIDEO_SIDE, MODEL_PATH, 'detections/side.json')


Running YOLO detections...

0: 384x640 3 players, 91.1ms
Speed: 5.1ms preprocess, 91.1ms inference, 166.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 players, 86.4ms
Speed: 3.1ms preprocess, 86.4ms inference, 26.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 players, 86.5ms
Speed: 3.1ms preprocess, 86.5ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 players, 86.6ms
Speed: 3.0ms preprocess, 86.6ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 players, 86.7ms
Speed: 2.6ms preprocess, 86.7ms inference, 3.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 11 players, 1 referee, 86.5ms
Speed: 2.8ms preprocess, 86.5ms inference, 3.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 goalkeeper, 12 players, 1 referee, 86.4ms
Speed: 2.3ms preprocess, 86.4ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 1 goalkeeper, 14 player

In [4]:
# ---- STEP 2: Feature Extraction ----
print("Extracting features...")
extract_features('detections/front.json', VIDEO_FRONT, 'features/front.npy')
extract_features('detections/side.json', VIDEO_SIDE, 'features/side.npy')


Extracting features...
Saved features to features/front.npy
Saved features to features/side.npy


In [5]:
# ---- STEP 3: Matching Across Views ----
print("Matching tracks across views...")
os.makedirs('matches', exist_ok=True) 
match_tracks('features/front.npy', 'features/side.npy', 'matches/global_id_map.json')

Matching tracks across views...
Saved global ID map to matches/global_id_map.json


In [6]:
# ---- STEP 4: Render Final Videos ----
def render_video(video_path, detections_json, global_id_map, output_path):
    with open(detections_json) as f:
        detections = json.load(f)
    cap = cv2.VideoCapture(video_path)
    out = None
    frame_id = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_id >= len(detections):
            break

        frame = draw_boxes(frame, detections[frame_id], global_id_map, frame_id, view='front' if 'front' in video_path else 'side')

        if out is None:
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, 30, (frame.shape[1], frame.shape[0]))
        out.write(frame)
        frame_id += 1

    cap.release()
    out.release()
    print(f"Saved output: {output_path}")

with open('matches/global_id_map.json') as f:
    global_id_map = json.load(f)

render_video(VIDEO_FRONT, 'detections/front.json', global_id_map, 'output/front_output.mp4')
render_video(VIDEO_SIDE, 'detections/side.json', global_id_map, 'output/side_output.mp4')


Saved output: output/front_output.mp4
Saved output: output/side_output.mp4
