In [None]:
!pip install -q ultralytics gdown filterpy lap deep_sort_realtime easyocr tqdm

In [5]:
import cv2
import numpy as np
import torch
from tqdm import tqdm
import easyocr
from collections import defaultdict, deque
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from filterpy.kalman import KalmanFilter
import math# Download files
gdown.download(id="1-5fOSHOSB9UXyP_enOoZNAMScrePVcMD", output="best.pt", quiet=False)
gdown.download(id="1TDcND31fvEDvcnZCaianTxJrmT8q7iIi", output="15sec_input_720p.mp4", quiet=False)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

# Load YOLOv11 model
model = YOLO("best.pt").to(device)

Downloading...
From (original): https://drive.google.com/uc?id=1-5fOSHOSB9UXyP_enOoZNAMScrePVcMD
From (redirected): https://drive.google.com/uc?id=1-5fOSHOSB9UXyP_enOoZNAMScrePVcMD&confirm=t&uuid=e03388ca-e4fe-4426-8515-bba605e4c0e3
To: /content/best.pt
100%|██████████| 195M/195M [00:00<00:00, 259MB/s]
Downloading...
From: https://drive.google.com/uc?id=1TDcND31fvEDvcnZCaianTxJrmT8q7iIi
To: /content/15sec_input_720p.mp4
100%|██████████| 5.18M/5.18M [00:00<00:00, 61.2MB/s]


Using device: cuda


In [7]:
def process_video(input_path, output_path):
    # Initialize models
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print("Using device:", device)

    # Load YOLO model
    model = YOLO("best.pt").to(device)

    # Initialize tracker
    tracker = DeepSort(max_age=30, n_init=3)

    # Initialize OCR reader
    ocr_reader = easyocr.Reader(['en'])

    # Data structures
    track_history = defaultdict(lambda: deque(maxlen=10))
    jersey_map = {}  # Maps track_id to jersey number
    last_ocr_frame = defaultdict(int)

    # Video setup
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    frame_count = 0
    pbar = tqdm(total=int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), desc="Processing video")

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Run detection
        results = model(frame, verbose=False)[0]
        boxes = results.boxes.xyxy.cpu().numpy()
        classes = results.boxes.cls.cpu().numpy()
        confs = results.boxes.conf.cpu().numpy()

        # Filter detections (only players with high confidence and reasonable size)
        detections = []
        for (x1, y1, x2, y2), conf, cls in zip(boxes, confs, classes):
            if int(cls) == 2 and conf > 0.7 and (x2 - x1) * (y2 - y1) > 1000:
                detections.append(([x1, y1, x2 - x1, y2 - y1], conf, 'player'))

        # Update tracker
        tracks = tracker.update_tracks(detections, frame=frame)

        # Process tracks
        for track in tracks:
            if not track.is_confirmed():
                continue

            track_id = int(track.track_id)
            x1, y1, x2, y2 = map(int, track.to_ltrb())
            cx, cy = (x1 + x2) // 2, (y1 + y2) // 2

            # Movement validation
            if track_id in track_history and len(track_history[track_id]) > 0:
                prev_cx, prev_cy = track_history[track_id][-1]
                dx, dy = abs(cx - prev_cx), abs(cy - prev_cy)
                if dx > 100 or dy > 50:  # Skip implausible jumps
                    continue

            track_history[track_id].append((cx, cy))

            # Optimized OCR (run only every 15 frames for each track)
            if (track_id not in jersey_map) and (frame_count - last_ocr_frame.get(track_id, 0) > 15):
                try:
                    torso_region = frame[y1:y1 + int(0.5*(y2-y1)), x1:x2]
                    ocr_results = ocr_reader.readtext(torso_region, allowlist='0123456789')

                    # Filter OCR results (must be 1-2 digit number with high confidence)
                    for result in ocr_results:
                        text = result[1].strip()
                        if (text.isdigit() and
                            1 <= int(text) <= 99 and
                            result[2] > 0.7 and
                            len(text) <= 2):
                            jersey_map[track_id] = text
                            last_ocr_frame[track_id] = frame_count
                            break
                except:
                    pass

            # Draw bounding box and label
            color = (0, 255, 0) if track_id in jersey_map else (0, 0, 255)
            label = f"#{jersey_map[track_id]}" if track_id in jersey_map else f"ID:{track_id}"

            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1 - 10),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

        # Write frame
        out.write(frame)
        frame_count += 1
        pbar.update(1)

    pbar.close()
    cap.release()
    out.release()

    # Print summary
    print("\nPlayer Jersey Number Mapping:")
    for track_id, jersey in jersey_map.items():
        print(f"Track ID {track_id} -> Jersey #{jersey}")

# Process the video
process_video("15sec_input_720p.mp4", "output_tracking.mp4")

Using device: cuda


Processing video: 100%|██████████| 375/375 [01:56<00:00,  3.21it/s]


Player Jersey Number Mapping:
Track ID 13 -> Jersey #28
Track ID 45 -> Jersey #10
Track ID 50 -> Jersey #13
Track ID 55 -> Jersey #52
Track ID 52 -> Jersey #6
Track ID 105 -> Jersey #5
Track ID 141 -> Jersey #08
Track ID 149 -> Jersey #10
Track ID 87 -> Jersey #10
Track ID 168 -> Jersey #2
Track ID 159 -> Jersey #57



