In [34]:
!pip install ultralytics
!pip install deep_sort_realtime
!pip install git+https://github.com/KaiyangZhou/deep-person-reid.git

Collecting git+https://github.com/KaiyangZhou/deep-person-reid.git
  Cloning https://github.com/KaiyangZhou/deep-person-reid.git to /tmp/pip-req-build-snbv87_c
  Running command git clone --filter=blob:none --quiet https://github.com/KaiyangZhou/deep-person-reid.git /tmp/pip-req-build-snbv87_c
  Resolved https://github.com/KaiyangZhou/deep-person-reid.git to commit 566a56a2cb255f59ba75aa817032621784df546a
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [35]:
import cv2
import numpy as np
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
import os
from math import hypot
import torch
import torchvision.transforms as transforms
from torchreid.models import osnet_x1_0  # OSNet variant
from sklearn.metrics.pairwise import cosine_similarity

In [36]:
video_path = "/content/15sec_input_720p.mp4"
model_path = "/content/best.pt"
model = YOLO(model_path)
tracker = DeepSort(max_age=60, n_init=5, max_iou_distance=0.3)

In [37]:
cap = cv2.VideoCapture(video_path)
width, height = int(cap.get(3)), int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

In [38]:
# Scaling factors for YOLO inference (384x640) to original (1280x720)
input_width, input_height = 384, 640
scale_x = width / input_width
scale_y = height / input_height

In [None]:
player_tracks = defaultdict(list)
unique_ids = set()
player_speeds = defaultdict(list)
player_embeddings = {}
player_box_sizes = defaultdict(list)
passes = []
frame_idx = 0

In [40]:
device = "cuda" if torch.cuda.is_available() else "cpu"
reid_model = osnet_x1_0(pretrained=True).eval().to(device)
extract_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 128)),  # OSNet expects 256x128 input
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

Successfully loaded imagenet pretrained weights from "/root/.cache/torch/checkpoints/osnet_x1_0_imagenet.pth"


In [41]:
def get_embedding(crop):
    if crop.size == 0:
        return np.zeros(512)  # OSNet x1.0 outputs 512-dimensional features
    tensor = extract_transform(crop).unsqueeze(0).to(device)
    with torch.no_grad():
        return reid_model(tensor).cpu().numpy().flatten()

In [42]:
def match_id(crop, known_embeddings, threshold=0.75):  # Adjusted threshold for OSNet
    emb = get_embedding(crop).reshape(1, -1)
    best_id, best_score = None, 0
    for pid, ref_emb in known_embeddings.items():
        sim = cosine_similarity(emb, ref_emb.reshape(1, -1))[0][0]
        if sim > best_score and sim > threshold:
            best_id = pid
            best_score = sim
    return best_id

In [43]:
def smooth_box(prev_box, curr_box, alpha=0.7):
    if prev_box is None:
        return curr_box
    l1, t1, w1, h1 = prev_box
    l2, t2, w2, h2 = curr_box
    l = int(alpha * l1 + (1 - alpha) * l2)
    t = int(alpha * t1 + (1 - alpha) * t2)
    w = int(alpha * w1 + (1 - alpha) * w2)
    h = int(alpha * h1 + (1 - alpha) * h2)
    return [l, t, w, h]

In [None]:
# Parameters
conf_threshold = 0.8  # Adjust confidence threshold
nms_threshold = 0.2   # Adjust NMS threshold
frame_width = 640
frame_height = 384

track_history = {}

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print(f"Video ended at frame {frame_idx}")
        break
    frame_idx += 1

    # Preprocess frame for YOLO (resize to 384x640)
    input_frame = cv2.resize(frame, (input_width, input_height))
    results = model(input_frame)[0]
    detections = []
    ball_position = None

    for r in results.boxes.data.tolist():
        x1, y1, x2, y2, score, cls = r
        cls = int(cls)
        # Scale bounding box to original frame size
        x1, y1, x2, y2 = x1 * scale_x, y1 * scale_y, x2 * scale_x, y2 * scale_y
        bbox = [x1, y1, x2 - x1, y2 - y1]
        w, h = x2 - x1, y2 - y1
        if score < conf_threshold:  # Skip low-confidence detections
            continue
        if w * h < 1000:  # Minimum box size filter
            continue
        if cls == 2:  # player
            if h / w < 1.5:
                h = int(w * 1.5)  # Adjusted aspect ratio
                y2 = y1 + h
            detections.append((bbox, score, 'player'))
        elif cls == 0:  # ball
            cx, cy = int((x1 + x2)/2), int((y1 + y2)/2)
            ball_position = (cx, cy)
            cv2.circle(frame, (cx, cy), 7, (0, 0, 255), -1)
            cv2.putText(frame, 'Ball', (cx + 10, cy), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

    tracks = tracker.update_tracks(detections, frame=frame)

    for track in tracks:
        if not track.is_confirmed():
            continue

        track_id = track.track_id
        l, t, r, b = map(int, track.to_ltrb())
        cx = int((l + r) / 2)
        cy = int((t + b) / 2)
        w, h = r - l, b - t

        # Smooth bounding box
        prev_box = player_box_sizes[track_id][-1] if player_box_sizes[track_id] else None
        smoothed_box = smooth_box(prev_box, [l, t, w, h])
        l, t, w, h = smoothed_box
        r, b = l + w, t + h

        # Debug centroid coordinates
        print(f"Frame {frame_idx}, Track {track_id}: cx={cx}, cy={cy}")

        # Re-ID embedding with OSNet
        crop = frame[t:b, l:r]
        if crop.size > 0:
            emb = get_embedding(crop)
            if track_id not in player_embeddings or len(player_embeddings[track_id]) == 0:
                player_embeddings[track_id] = emb
            else:
                match = match_id(crop, {k: v for k, v in player_embeddings.items() if k != track_id})
                if match and cosine_similarity(emb.reshape(1, -1), player_embeddings[match].reshape(1, -1))[0][0] > 0.75:
                    track_id = match

        # Draw
        cv2.rectangle(frame, (l, t), (r, b), (0, 255, 0), 2)
        cv2.putText(frame, f'Player {track_id}', (l, t - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    out.write(frame)

cap.release()
out.release()

# Analysis Plots
print(f"\n Tracking Complete: {len(unique_ids)} unique players re-identified.")


0: 640x384 32 players, 2 referees, 68.9ms
Speed: 2.6ms preprocess, 68.9ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 33 players, 2 referees, 36.6ms
Speed: 3.7ms preprocess, 36.6ms inference, 2.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 37 players, 2 referees, 42.6ms
Speed: 2.0ms preprocess, 42.6ms inference, 4.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 34 players, 2 referees, 37.9ms
Speed: 4.0ms preprocess, 37.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 35 players, 2 referees, 41.3ms
Speed: 5.1ms preprocess, 41.3ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 35 players, 3 referees, 44.9ms
Speed: 1.9ms preprocess, 44.9ms inference, 2.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 42 players, 3 referees, 36.9ms
Speed: 1.9ms preprocess, 36.9ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 32 playe