In [None]:
import cv2
from ultralytics import YOLO
from deep_sort_realtime.deepsort_tracker import DeepSort

# Load model and tracker
model = YOLO("best.pt")
tracker = DeepSort(max_age=30)


# Open video
cap = cv2.VideoCapture("15sec_input_720p.mp4")
#output video
frame_w   = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_h   = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out=cv2.VideoWriter('tracked_output.mp4', fourcc, fps, (frame_w,frame_h))

# Reads each frame of the video one by one
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break  # end of video

    # Run detection
    results = model(frame)
    result = results[0]

    # Extract values safely
    boxes = result.boxes.xyxy.cpu().numpy()
    confs = result.boxes.conf.cpu().numpy()
    clss = result.boxes.cls.cpu().numpy()

    # Build detections list
    detections = []
    for box, conf, cls_id in zip(boxes, confs, clss):
        x1, y1, x2, y2 = box
        w, h = x2 - x1, y2 - y1
        detections.append(([int(x1), int(y1), int(w), int(h)], conf, 'player'))

    # Update tracker
    tracks = tracker.update_tracks(detections, frame=frame)  #the Kalman Filter predicts 
    #the new positions of existing tracks before assigning new detections.
    #The Hungarian Algorithm is applied internally to solve the assignment problem:
    #It finds the best matching between predicted tracks and the new detections.
#This minimizes the total cost (distance between predicted and detected positions, possibly including
# appearance similarity).


    # Draw tracks
    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        l, t, r, b = track.to_ltrb()
        cv2.rectangle(frame, (int(l), int(t)), (int(r), int(b)), (0, 255, 0), 2) #Box features
        cv2.putText(frame, f"ID {track_id}", (int(l), int(t) - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) # text above the bounding boxes
        
    out.write(frame)
   
    # Display frame
    cv2.imshow("Re-ID Tracker", frame)
    
    #press 'q' to stop loop
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
out.release()
cv2.destroyAllWindows()



0: 384x640 1 ball, 16 players, 2 referees, 2965.2ms
Speed: 12.5ms preprocess, 2965.2ms inference, 23.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 18 players, 2 referees, 3442.2ms
Speed: 20.5ms preprocess, 3442.2ms inference, 7.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 3537.8ms
Speed: 30.1ms preprocess, 3537.8ms inference, 2.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 3060.4ms
Speed: 6.7ms preprocess, 3060.4ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 2966.8ms
Speed: 17.6ms preprocess, 2966.8ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 2890.9ms
Speed: 7.4ms preprocess, 2890.9ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 players, 2 referees, 2984.5ms
Speed: 52.4ms preprocess, 2984.5ms inference, 