# YOLO

## Install necessary libraries

In [1]:
!pip install ultralytics
!pip install supervision

!git clone https://github.com/Ahms9731/Project_Phase2.git

Collecting ultralytics
  Downloading ultralytics-8.3.70-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nv

# SOT

## Import necessary libraries

In [2]:
from ultralytics import YOLO
import cv2
import numpy as np
import supervision as sv
import os
import sys
import random
import torch

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


## Define some functions to deal with videos

In [3]:
def read_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    return frames

def save_video(output_video_frames, output_video_path):
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_video_path, fourcc, 24,
                          (output_video_frames[0].shape[1], output_video_frames[0].shape[0]))
    for frame in output_video_frames:
        out.write(frame)
    out.release()

## Define some functions to draw the annotations

In [4]:
def get_center_of_bbox(bbox):
    x1,y1,x2,y2 = bbox
    return int((x1+x2)/2),int((y1+y2)/2)

def get_bbox_width(bbox):
    return bbox[2]-bbox[0]

def measure_distance(p1,p2):
    return ((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2)**0.5

def measure_xy_distance(p1,p2):
    return p1[0]-p2[0],p1[1]-p2[1]

def get_foot_position(bbox):
    x1,y1,x2,y2 = bbox
    return int((x1+x2)/2),int(y2)

## Tracker

In [5]:
# First run these commands in Google Colab to set up dependencies
!git clone https://github.com/STVIR/pysot.git
!pip install torch torchvision opencv-python
!pip install pyyaml yacs tqdm
!gdown --fuzzy "https://drive.google.com/file/d/1-tEtYQdT1G9kn8HsqKNDHVqjE16F8YQH/view?usp=drive_link" -O /content/pysot/experiments/siamrpn_r50_l234_dwxcorr/model.pth

sys.path.insert(0, '/content/pysot')

Cloning into 'pysot'...
remote: Enumerating objects: 433, done.[K
remote: Counting objects:  33% (1/3)[Kremote: Counting objects:  66% (2/3)[Kremote: Counting objects: 100% (3/3)[Kremote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 433 (delta 0), reused 0 (delta 0), pack-reused 430 (from 2)[K
Receiving objects: 100% (433/433), 6.35 MiB | 16.99 MiB/s, done.
Resolving deltas: 100% (194/194), done.
Collecting yacs
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Installing collected packages: yacs
Successfully installed yacs-0.1.8
Downloading...
From (original): https://drive.google.com/uc?id=1-tEtYQdT1G9kn8HsqKNDHVqjE16F8YQH
From (redirected): https://drive.google.com/uc?id=1-tEtYQdT1G9kn8HsqKNDHVqjE16F8YQH&confirm=t&uuid=67187c56-bb92-4331-a0e6-4948ece0d813
To: /content/pysot/experiments/siamrpn_r50_l234_dwxcorr/model.pth
100% 216M/216M [00:02<00:00, 100MB/s]


In [6]:
# PySOT imports
from pysot.core.config import cfg
from pysot.tracker.siamrpn_tracker import SiamRPNTracker
from pysot.models.model_builder import ModelBuilder

# Function to load PySOT Siamese tracker
def load_siamese_tracker(pysot_config_path, pysot_model_path):
    # Update config with the model's config file
    cfg.merge_from_file(pysot_config_path)
    cfg.CUDA = torch.cuda.is_available()

    # Initialize the model
    model = ModelBuilder()
    model_path = pysot_model_path
    if cfg.CUDA:
        model.load_state_dict(torch.load(model_path))
        model = model.cuda()
    else:
        model.load_state_dict(torch.load(model_path,
                                         map_location=lambda storage, loc: storage.cpu()))
    model.eval()

    # Create tracker instance
    tracker = SiamRPNTracker(model)
    return tracker

class Tracker:
    def __init__(self, model_path, pysot_config_path, pysot_model_path):
        self.model = YOLO(model_path)
        self.siam_tracker = load_siamese_tracker(pysot_config_path, pysot_model_path)
        self.tracking_initialized = False
        self.tracked_bboxes = []

    def detect_frames(self, frames):
        batch_size = 20
        detections = []
        for i in range(0, len(frames), batch_size):
            detections_batch = self.model.predict(frames[i:i+batch_size], conf=0.1)
            detections += detections_batch
        return detections

    def initialize_tracker_with_random_player(self, frame):
        detections = self.model.predict(frame, conf=0.1)[0]
        cls_names = detections.names
        cls_names_inv = {v: k for k, v in cls_names.items()}

        # Convert to supervision Detection format
        detection_supervision = sv.Detections.from_ultralytics(detections)

        # Filter for player and goalkeeper classes
        player_indices = [i for i, class_id in enumerate(detection_supervision.class_id)
                          if cls_names[class_id] in ['player', 'goalkeeper']]

        if not player_indices:
            raise Exception("No players detected in the first frame.")

        # Randomly select one player
        selected_index = random.choice(player_indices)
        bbox_xyxy = detection_supervision.xyxy[selected_index]  # Format: [x1, y1, x2, y2]
        bbox_xyxy = [float(coord) for coord in bbox_xyxy]

        # Convert to [x, y, w, h] for PySOT
        x1, y1, x2, y2 = bbox_xyxy
        bbox_xywh = [x1, y1, x2 - x1, y2 - y1]

        # Initialize the tracker with the selected bounding box
        self.siam_tracker.init(frame, bbox_xywh)
        self.tracking_initialized = True

        # Return the initial bounding box in [x1, y1, x2, y2] format
        return bbox_xyxy

    def get_object_tracks(self, frames):
        tracks = []
        for frame_num, frame in enumerate(frames):
            frame = frame.copy()
            if not self.tracking_initialized:
                bbox = self.initialize_tracker_with_random_player(frame)
                # bbox is in [x1, y1, x2, y2] format
            else:
                # Update tracker and get new bounding box
                outputs = self.siam_tracker.track(frame)
                bbox = outputs['bbox']  # Format: [x, y, w, h]
                # Convert [x, y, w, h] to [x1, y1, x2, y2]
                x, y, w, h = bbox
                bbox = [x, y, x + w, y + h]

            # Store the bounding box for this frame
            tracks.append({'frame_num': frame_num, 'bbox': bbox})

        self.tracked_bboxes = tracks
        return tracks

    def draw_ellipse(self, frame, bbox, color, track_id=None):
        x1, y1, x2, y2 = bbox
        x_center = int((x1 + x2) / 2)
        y2 = int(y2)
        width = int(x2 - x1)

        cv2.ellipse(
            frame,
            center=(x_center, y2),
            axes=(int(width / 2), int(0.35 * width)),
            angle=0.0,
            startAngle=-45,
            endAngle=235,
            color=color,
            thickness=2,
            lineType=cv2.LINE_4
        )

        rectangle_width = 40
        rectangle_height = 20
        x1_rect = x_center - rectangle_width // 2
        x2_rect = x_center + rectangle_width // 2
        y1_rect = (y2 - rectangle_height // 2) + 15
        y2_rect = (y2 + rectangle_height // 2) + 15

        if track_id is not None:
            cv2.rectangle(frame,
                          (int(x1_rect), int(y1_rect)),
                          (int(x2_rect), int(y2_rect)),
                          color,
                          cv2.FILLED)

            x1_text = x1_rect + 12
            if track_id > 99:
                x1_text -= 10

            cv2.putText(
                frame,
                f"{track_id}",
                (int(x1_text), int(y1_rect + 15)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.6,
                (0, 0, 0),
                2
            )

        return frame

    def draw_annotations(self, video_frames):
        output_video_frames = []
        for track_info, frame in zip(self.tracked_bboxes, video_frames):
            frame = frame.copy()
            bbox = track_info['bbox']

            # Define color for the player (Red)
            color = (0, 0, 255)

            # Draw an ellipse around the player
            frame = self.draw_ellipse(frame, bbox, color, track_id=1)

            output_video_frames.append(frame)

        return output_video_frames

## Detection and tracking on a football video

In [7]:
yolo_path = '/content/Project_Phase2/Models/yolo_best_model.pt'

# If you wanna test it yourself, upload the video in colab local directory and replace its path here:
test_video_path = '/content/Project_Phase2/Test Videos/test_video_1.mp4'

def main():
    # Read Video
    video_frames = read_video(test_video_path)

    # Initialize Tracker
    tracker = Tracker(model_path=yolo_path,
                      pysot_config_path='/content/pysot/experiments/siamrpn_r50_l234_dwxcorr/config.yaml',
                      pysot_model_path='/content/pysot/experiments/siamrpn_r50_l234_dwxcorr/model.pth')

    tracks = tracker.get_object_tracks(video_frames)

    # Draw object Tracks
    output_video_frames = tracker.draw_annotations(video_frames)

    # Save video
    save_video(output_video_frames, '/content/SOT_output_video.avi')

main()


0: 768x1280 2 balls, 1 goalkeeper, 21 players, 3 referees, 115.9ms
Speed: 33.4ms preprocess, 115.9ms inference, 414.6ms postprocess per image at shape (1, 3, 768, 1280)
