# DeepSORT Tracker

## Install necessary libraries

In [1]:
!pip install ultralytics
!pip install supervision
!pip install deep-sort-realtime

!git clone https://github.com/Ahms9731/Project_Phase2.git

Collecting ultralytics
  Downloading ultralytics-8.3.71-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

# Tracker

## Import necessary libraries

In [2]:
import numpy as np
import cv2
from ultralytics import YOLO
import supervision as sv
from deep_sort_realtime.deepsort_tracker import DeepSort

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


## Define some functions to deal with videos

In [3]:
def read_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    return frames

def save_video(ouput_video_frames,output_video_path):
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_video_path, fourcc, 24, (ouput_video_frames[0].shape[1], ouput_video_frames[0].shape[0]))
    for frame in ouput_video_frames:
        out.write(frame)
    out.release()

## Define some functions to draw the annotations

In [4]:
def get_center_of_bbox(bbox):
    x1,y1,x2,y2 = bbox
    return int((x1+x2)/2),int((y1+y2)/2)

def get_bbox_width(bbox):
    return bbox[2]-bbox[0]

def measure_distance(p1,p2):
    return ((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2)**0.5

def measure_xy_distance(p1,p2):
    return p1[0]-p2[0],p1[1]-p2[1]

def get_foot_position(bbox):
    x1,y1,x2,y2 = bbox
    return int((x1+x2)/2),int(y2)

## Tracker

In [5]:
class Tracker:
    def __init__(self, model_path):
        self.model = YOLO(model_path)
        self.tracker = DeepSort(max_age=5)

    def detect_frames(self, frames):
        batch_size = 20
        detections = []
        for i in range(0, len(frames), batch_size):
            detections_batch = self.model.predict(frames[i:i+batch_size], conf=0.1)
            detections += detections_batch
        return detections

    def get_object_tracks(self, frames):
        detections = self.detect_frames(frames)

        tracks = {
            "players": [],
            "referees": [],
            "ball": []
        }

        for frame_num, (frame, detection) in enumerate(zip(frames, detections)):
            cls_names = detection.names
            cls_names_inv = {v: k for k, v in cls_names.items()}

            # Convert to supervision detections
            detection_supervision = sv.Detections.from_ultralytics(detection)

            # Convert goalkeepers to players
            for object_ind, class_id in enumerate(detection_supervision.class_id):
                if cls_names[class_id] == "goalkeeper":
                    detection_supervision.class_id[object_ind] = cls_names_inv["player"]

            # Format detections for DeepSORT
            detections_list = []
            for i in range(len(detection_supervision)):
                x1, y1, x2, y2 = detection_supervision.xyxy[i]
                w = x2 - x1
                h = y2 - y1
                conf = float(detection_supervision.confidence[i])
                cls = int(detection_supervision.class_id[i])

                detections_list.append([
                    [x1, y1, w, h],  # XYWH format
                    conf,
                    cls
                ])

            # Convert frame to RGB
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Update tracker
            tracked_objects = self.tracker.update_tracks(
                detections_list,
                frame=frame_rgb
            )

            # Initialize frame tracks
            tracks["players"].append({})
            tracks["referees"].append({})
            tracks["ball"].append({})

            # Process tracked objects
            for track in tracked_objects:
                if not track.is_confirmed():
                    continue

                track_id = str(track.track_id)
                class_id = track.get_det_class()
                bbox = track.to_ltwh()

                # Convert to XYXY format
                x1, y1, w, h = bbox
                bbox_xyxy = [x1, y1, x1 + w, y1 + h]

                if class_id == cls_names_inv['player']:
                    tracks["players"][frame_num][track_id] = {"bbox": bbox_xyxy}
                elif class_id == cls_names_inv['referee']:
                    tracks["referees"][frame_num][track_id] = {"bbox": bbox_xyxy}
                elif class_id == cls_names_inv['ball']:
                    tracks["ball"][frame_num][track_id] = {"bbox": bbox_xyxy}

        return tracks

    def draw_ellipse(self, frame, bbox, color, track_id=None):
        y2 = int(bbox[3])
        x_center, _ = get_center_of_bbox(bbox)

        # Calculate and validate dimensions
        width = abs(get_bbox_width(bbox))  # Ensure positive width
        axis_width = int(width)
        axis_height = int(0.35 * width)

        # Ensure minimum size to prevent OpenCV errors
        axis_width = max(axis_width, 1)
        axis_height = max(axis_height, 1)

        cv2.ellipse(
            frame,
            center=(x_center, y2),
            axes=(axis_width, axis_height),
            angle=0.0,
            startAngle=-45,
            endAngle=235,
            color=color,
            thickness=2,
            lineType=cv2.LINE_4
        )

        # Rest of the drawing logic remains unchanged
        rectangle_width = 40
        rectangle_height = 20
        x1_rect = x_center - rectangle_width//2
        x2_rect = x_center + rectangle_width//2
        y1_rect = (y2 - rectangle_height//2) + 15
        y2_rect = (y2 + rectangle_height//2) + 15

        if track_id is not None:
            cv2.rectangle(frame,
                        (int(x1_rect), int(y1_rect)),
                        (int(x2_rect), int(y2_rect)),
                        color,
                        cv2.FILLED)

            track_id_int = int(track_id)
            x1_text = x1_rect + 12
            if track_id_int > 99:
                x1_text -= 10

            cv2.putText(
                frame,
                f"{track_id}",
                (int(x1_text), int(y1_rect + 15)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                (0, 0, 0),
                2
            )

        return frame

    def draw_traingle(self,frame,bbox,color):
        y= int(bbox[1])
        x,_ = get_center_of_bbox(bbox)

        triangle_points = np.array([
            [x,y],
            [x-10,y-20],
            [x+10,y-20],
        ])
        cv2.drawContours(frame, [triangle_points],0,color, cv2.FILLED)
        cv2.drawContours(frame, [triangle_points],0,(0,0,0), 2)

        return frame

    def draw_annotations(self,video_frames, tracks):
        output_video_frames= []
        for frame_num, frame in enumerate(video_frames):
            frame = frame.copy()

            player_dict = tracks["players"][frame_num]
            ball_dict = tracks["ball"][frame_num]
            referee_dict = tracks["referees"][frame_num]

            # Draw Players
            for track_id, player in player_dict.items():
                color = player.get("team_color",(0,0,255))
                frame = self.draw_ellipse(frame, player["bbox"],color, track_id)

                if player.get('has_ball',False):
                    frame = self.draw_traingle(frame, player["bbox"],(0,0,255))

            # Draw Referee
            for _, referee in referee_dict.items():
                frame = self.draw_ellipse(frame, referee["bbox"],(0,255,255))

            # Draw ball
            for track_id, ball in ball_dict.items():
                frame = self.draw_traingle(frame, ball["bbox"],(0,255,0))

            output_video_frames.append(frame)

        return output_video_frames

## Detection and tracking on a football video

In [6]:
yolo_path = '/content/Project_Phase2/Models/yolo_best_model.pt'

# If you wanna test it yourself, upload the video in colab local directory and replace its path here:
test_video_path = '/content/Project_Phase2/Test Videos/test_video_1.mp4'

def main():
    # Read Video
    video_frames = read_video(test_video_path)

    # Initialize Tracker
    tracker = Tracker(model_path=yolo_path)

    tracks = tracker.get_object_tracks(video_frames)

    # Draw object Tracks
    output_video_frames = tracker.draw_annotations(video_frames,tracks)

    # Save video
    save_video(output_video_frames, '/content/DeepSORT_output_video.avi')

main()


0: 768x1280 2 balls, 1 goalkeeper, 21 players, 3 referees, 20.4ms
1: 768x1280 2 balls, 1 goalkeeper, 21 players, 3 referees, 20.4ms
2: 768x1280 1 ball, 1 goalkeeper, 21 players, 3 referees, 20.4ms
3: 768x1280 1 ball, 1 goalkeeper, 22 players, 3 referees, 20.4ms
4: 768x1280 3 balls, 1 goalkeeper, 21 players, 3 referees, 20.4ms
5: 768x1280 1 ball, 1 goalkeeper, 22 players, 3 referees, 20.4ms
6: 768x1280 1 ball, 1 goalkeeper, 20 players, 3 referees, 20.4ms
7: 768x1280 1 ball, 1 goalkeeper, 21 players, 3 referees, 20.4ms
8: 768x1280 1 ball, 1 goalkeeper, 19 players, 3 referees, 20.4ms
9: 768x1280 1 ball, 1 goalkeeper, 19 players, 3 referees, 20.4ms
10: 768x1280 1 ball, 1 goalkeeper, 21 players, 3 referees, 20.4ms
11: 768x1280 3 balls, 1 goalkeeper, 21 players, 3 referees, 20.4ms
12: 768x1280 2 balls, 1 goalkeeper, 22 players, 3 referees, 20.4ms
13: 768x1280 2 balls, 1 goalkeeper, 24 players, 3 referees, 20.4ms
14: 768x1280 1 ball, 2 goalkeepers, 23 players, 3 referees, 20.4ms
15: 768x1280