In [1]:


import cv2
import numpy as np
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort
from tqdm import tqdm
import os


In [2]:

model = YOLO("best.pt")  


In [3]:
# Confirm model class labels
print(model.names)


{0: 'ball', 1: 'goalkeeper', 2: 'player', 3: 'referee'}


In [8]:
video_path = '15sec_input_720p.mp4'
cap = cv2.VideoCapture(video_path)

# Get video details
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps    = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Output writer setup
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output_reid_6.mp4', fourcc, fps, (width, height))

# Initialize DeepSort
tracker = DeepSort(max_age=30, n_init=2) 

In [9]:
frame_idx = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    print(f"[INFO] Processing frame: {frame_idx}")

    # Run YOLOv8 inference
    results = model(frame)[0]
    detections = []

    for box in results.boxes:
        cls = int(box.cls.item())
        conf = float(box.conf.item())
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        w, h = x2 - x1, y2 - y1

        # Confidence & size filter (to remove noise)
        if conf < 0.5 or w < 20 or h < 40 or w > 500 or h > 700:
            continue

        label = model.names[cls]
        if label not in ['player', 'goalkeeper', 'referee', 'ball']:
            continue  # Ignore unknown/unwanted classes

        detections.append(([x1, y1, w, h], conf, label))

    # Run tracking
    tracks = tracker.update_tracks(detections, frame=frame)

    for track in tracks:
        if not track.is_confirmed() or track.track_id is None:
            continue

        track_id = track.track_id
        x1, y1, x2, y2 = map(int, track.to_ltrb())
        label = track.get_det_class()

        if label == "player":
            text = f"Player {track_id}"
            color = (255, 255, 255)
        elif label == "goalkeeper":
            text = f"Goalkeeper {track_id}"
            color = (0, 0, 255)
        elif label == "referee":
            text = f"Referee {track_id}"
            color = (0, 215, 255)
        elif label == "ball":
            text = "Ball"
            color = (0, 255, 0)
        else:
            continue

        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, text, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

    # Write the output frame
    out.write(frame)
    frame_idx += 1

# Release everything
cap.release()
out.release()
cv2.destroyAllWindows()

print("[DONE] Output saved to: output_reid_5.mp4")

[INFO] Processing frame: 0



0: 384x640 1 ball, 16 players, 2 referees, 359.6ms
Speed: 4.7ms preprocess, 359.6ms inference, 9.7ms postprocess per image at shape (1, 3, 384, 640)
[INFO] Processing frame: 1

0: 384x640 18 players, 2 referees, 245.0ms
Speed: 0.9ms preprocess, 245.0ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)
[INFO] Processing frame: 2

0: 384x640 1 ball, 16 players, 2 referees, 246.2ms
Speed: 1.0ms preprocess, 246.2ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
[INFO] Processing frame: 3

0: 384x640 1 ball, 14 players, 2 referees, 260.3ms
Speed: 2.4ms preprocess, 260.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
[INFO] Processing frame: 4

0: 384x640 1 ball, 14 players, 2 referees, 249.6ms
Speed: 1.7ms preprocess, 249.6ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)
[INFO] Processing frame: 5

0: 384x640 1 ball, 16 players, 2 referees, 263.2ms
Speed: 0.9ms preprocess, 263.2ms inference, 1.0ms postprocess per image 