Implementation using frames and deepsort to perform identification and assigning labels to players.

In [None]:
import cv2
import os

video_path = r"C:\Users\nitis\OneDrive\Documents\AMV\Research\15sec_input_720p.mp4"
frames_folder = r"C:\Users\nitis\OneDrive\Documents\AMV\Research\frames"
os.makedirs(frames_folder, exist_ok=True)

cap = cv2.VideoCapture(video_path)
frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    frame_filename = os.path.join(frames_folder, f"frame_{frame_count:04d}.jpg")
    cv2.imwrite(frame_filename, frame)
    frame_count += 1

cap.release()
print(f"Extracted {frame_count} frames.")


In [None]:
import cv2
import os
import re
import random
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort

frames_folder = r"C:\Users\nitis\OneDrive\Documents\AMV\Research\frames"
video_output_path = r"C:\Users\nitis\OneDrive\Documents\AMV\Research\tracked_output.mp4"
yolo_model_path = r"C:\Users\nitis\OneDrive\Documents\AMV\Research\best.pt"

model = YOLO(yolo_model_path)
model.verbose = False

tracker = DeepSort(
    max_age=8,
    n_init=2,
    nms_max_overlap=0.2,
    max_cosine_distance=0.3,
    nn_budget=None,
    override_track_class=None,
    embedder="mobilenet",
    half=True,
    bgr=True,
    embedder_gpu=True,
)

def random_color():
    return (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

def get_sorted_frame_files(folder):
    pattern = re.compile(r'frame_(\d+)\.jpg')
    files = [f for f in os.listdir(folder) if pattern.match(f)]
    return sorted(files, key=lambda x: int(pattern.match(x).group(1)))

sorted_frames = get_sorted_frame_files(frames_folder)
first_frame = cv2.imread(os.path.join(frames_folder, sorted_frames[0]))
height, width = first_frame.shape[:2]

writer = cv2.VideoWriter(video_output_path, cv2.VideoWriter_fourcc(*'mp4v'), 25.0, (width, height))
track_colors = {}
unique_ids = set()

for idx, fname in enumerate(sorted_frames):
    path = os.path.join(frames_folder, fname)
    frame = cv2.imread(path)
    
    results = model.predict(frame, conf=0.25, verbose=False)[0]
    detections = results.boxes.xywh.cpu().tolist()
    confidences = results.boxes.conf.cpu().tolist()

    formatted = []
    for (x, y, w, h), conf in zip(detections, confidences):
        formatted.append(([x - w/2, y - h/2, w, h], conf, 0))

    tracks = tracker.update_tracks(formatted, frame=frame)

    for track in tracks:
        if not track.is_confirmed():
            continue
        tid = track.track_id
        x1, y1, x2, y2 = map(int, track.to_ltrb())

        if tid not in track_colors:
            track_colors[tid] = random_color()
        color = track_colors[tid]

        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f"ID: {tid}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
        unique_ids.add(tid)

    cv2.putText(frame, f"Frame: {idx+1}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
    cv2.putText(frame, f"Unique IDs: {len(unique_ids)}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
    writer.write(frame)

writer.release()
print(f"Tracking complete. Saved to {video_output_path}")


Implementation using DeepSort and Osnet for re-identification

In [None]:
import cv2
import numpy as np
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator
from deep_sort_realtime.deepsort_tracker import DeepSort

# Load your trained YOLOv8 model
yolo = YOLO(r"C:\Users\nitis\OneDrive\Documents\AMV\Research\best.pt")

# Initialize DeepSORT with OSNet
tracker = DeepSort(
    max_age=30,
    n_init=3,
    max_cosine_distance=0.4,
    nn_budget=None,
    embedder="torchreid",
    embedder_model_name="osnet_x1_0",
    embedder_wts=r"C:\Users\nitis\OneDrive\Documents\AMV\Research\osnet_x1_0_msmt17.pt",  # Make sure it's TorchReID format
    half=True,
    bgr=True,
)

# Open video
cap = cv2.VideoCapture(r"C:\Users\nitis\OneDrive\Documents\AMV\Research\15sec_input_720p.mp4")
out = cv2.VideoWriter("output_deepsort.mp4",
                      cv2.VideoWriter_fourcc(*'mp4v'),
                      cap.get(cv2.CAP_PROP_FPS),
                      (int(cap.get(3)), int(cap.get(4))))

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = yolo.predict(frame, conf=0.5)[0]
    detections = []

    # Collect player detections
    for box in results.boxes:
        cls = int(box.cls)
        if yolo.names[cls].lower() != "player":
            continue
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        conf = float(box.conf)
        w, h = x2 - x1, y2 - y1
        detections.append(([x1, y1, w, h], conf, 'player'))

    # DeepSORT tracking
    tracks = tracker.update_tracks(detections, frame=frame)

    ann = Annotator(frame)
    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        x1, y1, w, h = map(int, track.to_tlwh())
        x2, y2 = x1 + w, y1 + h
        ann.box_label([x1, y1, x2, y2], f"player {track_id}", color=(0, 255, 0))

    out.write(ann.result())
    cv2.imshow("DeepSORT + OSNet", ann.result())
    if cv2.waitKey(1) == 27:
        break

cap.release()
out.release()
cv2.destroyAllWindows()
print("Done!")


Using StrongSort and OsNet for re-identification

In [None]:
import cv2
import torch
import numpy as np
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator
from boxmot.tracker_zoo import create_tracker
from pathlib import Path

# Load YOLOv8 model
yolo = YOLO(r"C:\Users\nitis\OneDrive\Documents\AMV\Research\best.pt")

# Create StrongSORT tracker
tracker = create_tracker(
    tracker_type="strongsort",
    tracker_config=Path(r"C:\Users\nitis\OneDrive\Documents\AMV\Research\strongsort.yaml"),
    reid_weights=Path(r"C:\Users\nitis\OneDrive\Documents\AMV\Research\osnet_x1_0_msmt17.pth"),
    device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
)

# Open video
cap = cv2.VideoCapture(r"C:\Users\nitis\OneDrive\Documents\AMV\Research\15sec_input_720p.mp4")
out = cv2.VideoWriter("output_strongsort.mp4",
                      cv2.VideoWriter_fourcc(*'mp4v'),
                      cap.get(cv2.CAP_PROP_FPS),
                      (int(cap.get(3)), int(cap.get(4))))

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = yolo.predict(frame, conf=0.3)[0]
    dets = []

    for box in results.boxes:
        cls = int(box.cls)
        if yolo.names[cls].lower() != "player":
            continue
        x1, y1, x2, y2 = map(float, box.xyxy[0])
        conf = float(box.conf)
        cls_id = float(cls)
        dets.append([x1, y1, x2, y2, conf, cls_id])

    dets = np.array(dets) if dets else np.empty((0, 6))
    tracks = tracker.update(dets, frame)

    ann = Annotator(frame)
    for trk in tracks:
        x1, y1, x2, y2, tid = map(int, trk[:5])
        ann.box_label([x1, y1, x2, y2], f"player {tid}", color=(0, 255, 0))

    out.write(ann.result())
    cv2.imshow("StrongSORT", ann.result())
    if cv2.waitKey(1) == 27:
        break

cap.release()
out.release()
cv2.destroyAllWindows()
print("Done!")


Using BotSort and Osnet for re-identification

In [None]:
import cv2
import torch
import numpy as np
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator
from boxmot.tracker_zoo import create_tracker
from pathlib import Path

# Setup model & tracker
yolo = YOLO(r"C:\Users\nitis\OneDrive\Documents\AMV\Research\best.pt")  # your trained model with "player" class
tracker = create_tracker(
    tracker_type="botsort",
    tracker_config=Path(r"C:\Users\nitis\OneDrive\Documents\AMV\Research\botsort.yaml"),
    reid_weights=Path(r"C:\Users\nitis\OneDrive\Documents\AMV\Research\osnet_x1_0)msmt17.pt"),  # downloaded ReID file
    device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
)

# Video IO
cap = cv2.VideoCapture(r"C:\Users\nitis\OneDrive\Documents\AMV\Research\15sec_input_720p.mp4")
out = cv2.VideoWriter("output_botsort_osnet.mp4",
    cv2.VideoWriter_fourcc(*'mp4v'),
    cap.get(cv2.CAP_PROP_FPS),
    (int(cap.get(3)), int(cap.get(4))))

while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = yolo.predict(frame, conf=0.5)[0]
    dets = []
    for box in results.boxes:
        cls = int(box.cls)
        if yolo.names[cls].lower() != "player":
            continue
        x1, y1, x2, y2 = map(float, box.xyxy[0])  # box.xyxy is likely a tensor of shape (1,4)
        conf = float(box.conf)
        cls_id = float(cls)  # or int(cls) if your tracker wants it as int
        dets.append([x1, y1, x2, y2, conf, cls_id])

    dets = np.array(dets) if dets else np.empty((0, 5))
    tracks = tracker.update(dets, frame)



    ann = Annotator(frame)
    for trk in tracks:
        x1, y1, x2, y2, tid = map(int, trk[:5])
        ann.box_label([x1, y1, x2, y2], f"player {tid}", color=(0,255,0))

    out.write(ann.result())
    cv2.imshow("BoT‑SORT + OSNet", ann.result())
    if cv2.waitKey(1)==27:
        break

cap.release()
out.release()
cv2.destroyAllWindows()
print("Done!")