In [213]:
# !pip install -q ultralytics supervision

In [214]:
import os
import time
import numpy as np
import torch
import cv2
from ultralytics import YOLO
import supervision as sv
from IPython.display import display, Image, HTML
from scipy.optimize import linear_sum_assignment

In [215]:
VIDEO_PATH = '/content/drive/MyDrive/stealth/15sec_input_720p.mp4'

MODEL_PATH = '/content/drive/MyDrive/stealth/best.pt'

OUTPUT_PATH = 'output.mp4'

In [216]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {DEVICE}")

Using device: cuda


In [217]:
model = YOLO(MODEL_PATH)
model = model.to(DEVICE)

In [218]:
model.__getattr__('names')

{0: 'ball', 1: 'goalkeeper', 2: 'player', 3: 'referee'}

In [219]:
CLASS_NAMES = {0: 'ball', 1: 'goalkeeper', 2: 'player', 3: 'referee'}
PERSONNEL_CLASS_IDS = [1, 2, 3]

In [220]:
video_info = sv.VideoInfo.from_video_path(VIDEO_PATH)
video_info

VideoInfo(width=1280, height=720, fps=25, total_frames=375)

In [221]:
tracker = sv.ByteTrack(frame_rate=video_info.fps)

In [222]:
# Initialize supervision annotators for visualization
box_annotator = sv.BoxAnnotator(thickness=2)
label_annotator = sv.LabelAnnotator(text_scale=0.5, text_thickness=1, text_position=sv.Position.TOP_CENTER)

In [223]:
frames_generator = sv.get_video_frames_generator(source_path=VIDEO_PATH)

In [224]:
def calculate_hsv_histogram(frame, bbox):
    """Calculates a 3D HSV color histogram for a bounding box region."""

    x1, y1, x2, y2 = map(int, bbox)

    # Ensure the bounding box is within the frame
    x1, x2 = max(0, x1), min(frame.shape[1], x2)
    y1, y2 = max(0, y1), min(frame.shape[0], y2)

    # Return an empty histogram for invalid boxes
    if x1 >= x2 or y1 >= y2:
        return np.zeros(512)

    roi = frame[y1:y2, x1:x2]
    hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv_roi], [0, 1, 2], None, [8, 8, 8], [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist, 0, 255, cv2.NORM_MINMAX)

    return hist.flatten()

In [225]:
player_features = {}          # Maps a permanent unique player ID to its latest color histogram
tracker_id_to_player_id = {}  # Maps a temporary tracker ID from ByteTrack to a permanent playerID
next_player_id = 1            # Iterator for the next available permanent player ID

In [226]:
with sv.VideoSink(OUTPUT_PATH, video_info) as sink:
    start_time = time.time()

    for frame_count, frame in enumerate(frames_generator):
        results = model(frame, verbose=False, conf=0.25)[0]
        detections = sv.Detections.from_ultralytics(results)
        detections = detections[np.isin(detections.class_id, PERSONNEL_CLASS_IDS)]

        tracked_detections = tracker.update_with_detections(detections)

        # --- Unique ID Assignment ---
        current_tracker_ids = tracked_detections.tracker_id

        # Identify new tracks and lost tracks
        new_tracker_ids = [tid for tid in current_tracker_ids if tid not in tracker_id_to_player_id]

        active_player_ids = set(tracker_id_to_player_id.values())
        lost_player_ids = [pid for pid in player_features if pid not in active_player_ids]

        # 1-to-1 matching
        if new_tracker_ids and lost_player_ids:
            new_hists = [calculate_hsv_histogram(frame, tracked_detections.xyxy[i]) for i, tid in enumerate(current_tracker_ids) if tid in new_tracker_ids]
            lost_hists = [player_features[pid] for pid in lost_player_ids]

            # Create a cost matrix (cost = 1 - similarity)
            cost_matrix = np.zeros((len(new_hists), len(lost_hists)))
            for i, new_hist in enumerate(new_hists):
                for j, lost_hist in enumerate(lost_hists):
                    cost_matrix[i, j] = 1 - cv2.compareHist(new_hist, lost_hist, cv2.HISTCMP_CORREL)

            # Find optimal assignment
            row_ind, col_ind = linear_sum_assignment(cost_matrix)

            # Apply matches that are above a similarity threshold
            for r, c in zip(row_ind, col_ind):
                similarity = 1 - cost_matrix[r, c]
                if similarity > 0.7:
                    new_id = new_tracker_ids[r]
                    lost_id = lost_player_ids[c]
                    tracker_id_to_player_id[new_id] = lost_id
                    # Remove the newly assigned IDs from the "new" list
                    new_tracker_ids.remove(new_id)

        # Assign brand new IDs to any remaining new tracks
        for new_id in new_tracker_ids:
            tracker_id_to_player_id[new_id] = next_player_id
            next_player_id += 1

        # --- Labeling and Feature Updates ---
        final_labels = []
        for i, (tracker_id, class_id) in enumerate(zip(tracked_detections.tracker_id, tracked_detections.class_id)):
            player_id = tracker_id_to_player_id[tracker_id]

            # Handling referees
            if class_id == 3:
                final_labels.append("Referee")
            else:
                final_labels.append(f"Player {player_id}")

            # Update the feature vector for current player_id
            current_hist = calculate_hsv_histogram(frame, tracked_detections.xyxy[i])
            # Use a moving average to adapt to appearance changes
            if player_id in player_features:
                 alpha = 0.9
                 player_features[player_id] = cv2.addWeighted(player_features[player_id], alpha, current_hist, 1 - alpha, 0)
            else:
                 player_features[player_id] = current_hist

        annotated_frame = frame.copy()
        if len(tracked_detections.xyxy) > 0:
            annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=tracked_detections)
            annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=tracked_detections, labels=final_labels)

        sink.write_frame(annotated_frame)

    end_time = time.time()


In [227]:
# --- Metrics ---
total_time = end_time - start_time
total_frames = video_info.total_frames
fps = total_frames / total_time if total_time > 0 else 0

In [228]:
# --- Final Results ---
print("--- Player Re-Identification Task Results ---")
print("\n" + "="*50 + "\n")

print("\n--- Performance ---")
print(f"Total Processing Time: {total_time:.2f} seconds")
print(f"Total Frames Processed: {total_frames}")
print(f"Average FPS: {fps:.2f}")

print("\n" + "="*50 + "\n")

--- Player Re-Identification Task Results ---



--- Performance ---
Total Processing Time: 24.36 seconds
Total Frames Processed: 375
Average FPS: 15.39




In [229]:
# For displaying video in Colab
def display_video(path):
    mp4 = open(path,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML("""
    <video width=800 controls>
          <source src="%s" type="video/mp4">
    </video>
    """ % data_url)

In [230]:
# Converts a video at {OUTPUT_PATH} to an H.264-encoded MP4 file
!ffmpeg -y -i {OUTPUT_PATH} -vcodec libx264 output_display.mp4 -hide_banner -loglevel error

In [231]:
from base64 import b64encode
display(display_video('output_display.mp4'))