In [1]:
!pip install ultralytics
!pip install moviepy --upgrade
!pip install gdown

Collecting ultralytics
  Downloading ultralytics-8.3.52-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.52-py3-none-any.whl (901 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m901.7/901.7 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.52 ultralytics-thop-2.0.13
Collecting moviepy
  Downloading moviepy-2.1.1-py3-none-any.whl.metadata (6.9 kB)
Collecting python-dotenv>=0.10 (from moviepy)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading moviepy-2.1.1-py3-none-any.whl (123 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.5/123.5 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_

In [2]:
from pathlib import Path
from typing import List, Tuple, Sequence

import numpy as np
import pandas as pd
from numpy import unravel_index
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm, notebook
from ultralytics import YOLO

from moviepy.video.io.ImageSequenceClip import ImageSequenceClip

import math
from scipy.ndimage import gaussian_filter

import os
import gc
import time
import random
import csv
import yaml
import gdown
import shutil

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
def get_num_clips(path: Path, game: int) -> int:
    return len(list((path / f'game{game}/').iterdir()))


def get_game_clip_pairs(path: Path, games: List[int]) -> List[Tuple[int, int]]:
    return [(game, c)  for game in games for c in range(1, get_num_clips(path, game) + 1)]


def load_clip_data(path: Path, game: int, clip: int, downscale: bool, quiet=False) -> np.ndarray:
    if not quiet:
        suffix = 'downscaled' if downscale else ''
        print(f'loading clip data (game {game}, clip {clip}) {suffix}')
    cache_path = path / 'cache'
    cache_path.mkdir(exist_ok=True)
    resize_code = '_ds2' if downscale else ''
    cached_data_name = f'{game}_{clip}{resize_code}.npz'
    if (cache_path / cached_data_name).exists():
        clip_data = np.load(cache_path / cached_data_name)['clip_data']
    else:
        clip_path = path / f'game{game}/clip{clip}'
        n_imgs = len(list(clip_path.iterdir())) - 1
        imgs = [None] * n_imgs
        for i in notebook.tqdm(range(n_imgs)):
            img = Image.open(clip_path / f'{i:04d}.jpg')
            if downscale:
                img = img.resize((img.width // 2, img.height // 2),)
            imgs[i] = np.array(img, dtype=np.uint8)
        clip_data = np.stack(imgs)
        cache_path.mkdir(exist_ok=True, parents=True)
        np.savez_compressed(cache_path / cached_data_name, clip_data=clip_data)
    return clip_data


def load_clip_labels(path: Path, game: int, clip: int, downscale: bool, quiet=False):
    if not quiet:
        print(f'loading clip labels (game {game}, clip {clip})')
    clip_path = path / f'game{game}/clip{clip}'
    labels = []
    with open(clip_path / 'labels.csv') as csvfile:
        lines = list(csv.reader(csvfile))
        for line in lines[1:]:
            values = np.array([-1 if i == '' else int(i) for i in line[1:]])
            if downscale:
                values[1] //= 2
                values[2] //= 2
            labels.append(values)
    return np.stack(labels)


def load_clip(path: Path, game: int, clip: int, downscale: bool, quiet=False):
    data = load_clip_data(path, game, clip, downscale, quiet)
    labels = load_clip_labels(path, game, clip, downscale, quiet)
    return data, labels

In [4]:
def prepare_experiment(out_path: Path) -> Path:
    out_path.mkdir(parents=True, exist_ok=True)
    dirs = [d for d in out_path.iterdir() if d.is_dir() and d.name.startswith('exp_')]
    experiment_id = max(int(d.name.split('_')[1]) for d in dirs) + 1 if dirs else 1
    exp_path = out_path / f'exp_{experiment_id}'
    exp_path.mkdir()
    return exp_path


def ball_gauss_template(rad, sigma):
    x, y = np.meshgrid(np.linspace(-rad, rad, 2 * rad + 1), np.linspace(-rad, rad, 2 * rad + 1)) 
    dst = np.sqrt(x * x + y * y) 
    gauss = np.exp(-(dst ** 2 / (2.0 * sigma ** 2)))     
    return gauss


def create_masks(data: np.ndarray, labels: np.ndarray, resize):
    rad = 64 #25
    sigma = 10
    if resize:
        rad //= 2
    ball = ball_gauss_template(rad, sigma)
    n_frames = data.shape[0]
    sh = rad
    masks = []
    for i in range(n_frames):
        label = labels[i, ...] 
        frame = data[i, ...]
        if 0 < label[0] < 3:
            x, y = label[1:3]
            mask = np.zeros((frame.shape[0] + 2 * rad + 2 * sh, frame.shape[1] + 2 * rad + 2 * sh), np.float32)
            mask[y + sh : y + sh + 2 * rad + 1, x + sh : x + sh + 2 * rad + 1] = ball
            mask = mask[rad + sh : -rad - sh, rad + sh : -rad - sh]
            masks.append(mask)
        else:
            masks.append(np.zeros((frame.shape[0], frame.shape[1]), dtype=np.float32))
    return np.stack(masks)

In [5]:
def _add_frame_number(frame: np.ndarray, number: int) -> np.ndarray:
    fnt = ImageFont.load_default() # ImageFont.truetype("arial.ttf", 25)
    img = Image.fromarray(frame)
    draw = ImageDraw.Draw(img)
    draw.text((10, 10), f'frame {number}', font=fnt, fill=(255, 0, 255))
    return np.array(img)


def _vis_clip(data: np.ndarray, lbls: np.ndarray, metrics: List[float] = None, ball_rad=5, color=(255, 0, 0), track_length=10):
    print('perfoming clip visualization')
    n_frames = data.shape[0]
    frames_res = []
    fnt = ImageFont.load_default() # ImageFont.truetype("arial.ttf", 25)
    for i in range(n_frames):
        img = Image.fromarray(data[i, ...])
        draw = ImageDraw.Draw(img)
        txt = f'frame {i}'
        if metrics is not None:
            txt += f', SiBaTrAcc: {metrics[i]:.3f}'
        draw.text((10, 10), txt, font=fnt, fill=(255, 0, 255))
        label = lbls[i]
        if label[0] != 0: # the ball is clearly visible
            px, py = label[1], label[2]
            draw.ellipse((px - ball_rad, py - ball_rad, px + ball_rad, py + ball_rad), outline=color, width=2)
            for q in range(track_length):
                if lbls[i-q-1][0] == 0:
                    break
                if i - q > 0:
                    draw.line((lbls[i - q - 1][1], lbls[i - q - 1][2], lbls[i - q][1], lbls[i - q][2]), fill=color)                
        frames_res.append(np.array(img))
    return frames_res


def _save_clip(frames: Sequence[np.ndarray], path: Path, fps):
    assert path.suffix in ('.mp4', '.gif')
    clip = ImageSequenceClip(frames, fps=fps)
    if path.suffix == '.mp4':
        clip.write_videofile(str(path), fps=fps, logger=None)
    else:
        clip.write_gif(str(path), fps=fps, logger=None)


def _to_yellow_heatmap(frame: np.ndarray, pred_frame: np.ndarray, alpha=0.4):
    img = Image.fromarray((frame * alpha).astype(np.uint8))
    maskR = (pred_frame * (1 - alpha) * 255).astype(np.uint8)
    maskG = (pred_frame * (1 - alpha) * 255).astype(np.uint8)
    maskB = np.zeros_like(maskG, dtype=np.uint8)
    mask = np.stack([maskR, maskG, maskB], axis=-1)
    return img + mask


def _vis_pred_heatmap(data_full: np.ndarray, pred_prob: np.ndarray, display_frame_number):
    n_frames = data_full.shape[0]
    v_frames = []
    for i in range(n_frames):
        frame = data_full[i, ...]
        pred = pred_prob[i, ...]
        hm = _to_yellow_heatmap(frame, pred)
        if display_frame_number:
            hm = _add_frame_number(hm, i)
        v_frames.append(hm)
    return v_frames


def visualize_prediction(data_full: np.ndarray, labels_pr: np.ndarray, save_path: Path, name: str, metrics=None, fps=15):
    with open(save_path / f'{name}.txt', mode='w') as f:
        if metrics is not None:
            f.write(f'SiBaTrAcc: {metrics[-1]} \n')
        for i in range(labels_pr.shape[0]):
            f.write(f'frame {i}: {labels_pr[i, 0]}, {labels_pr[i, 1]}, {labels_pr[i, 2]} \n')

    v = _vis_clip(data_full, labels_pr, metrics)
    _save_clip(v, save_path / f'{name}.mp4', fps=fps)


def visualize_prob(data: np.ndarray, pred_prob: np.ndarray, save_path: Path, name: str, frame_number=True, fps=15):
    v_pred = _vis_pred_heatmap(data, pred_prob, frame_number)
    _save_clip(v_pred, save_path / f'{name}_prob.mp4', fps=fps)

In [6]:
class Metrics:

    @staticmethod
    def position_error(label_gt: np.ndarray, label_pr: np.ndarray, step=8, alpha=1.5, e1=5, e2=5):
        # gt codes:
        # 0 - the ball is not within the image
        # 1 - the ball can easily be identified
        # 2 - the ball is in the frame, but is not easy to identify
        # 3 - the ball is occluded
        if label_gt[0] != 0 and label_pr[0] == 0:
            return e1
        if label_gt[0] == 0 and label_pr[0] != 0:
            return e2
        dist = math.sqrt((label_gt[1] - label_pr[1]) ** 2 + (label_gt[2] - label_pr[2]) ** 2)
        pe = math.floor(dist / step) ** alpha
        pe = min(pe, 5)
        return pe

    @staticmethod
    def evaluate_predictions(labels_gt, labels_pr) -> Tuple[List[float], float]:
        pe = [Metrics.position_error(labels_gt[i, ...], labels_pr[i, ...]) for i in range(len(labels_gt))]
        SIBATRACC = []
        for i, _ in enumerate(pe):
            SIBATRACC.append(1 - sum(pe[: i + 1]) / ((i + 1) * 5))
        SIBATRACC_total = 1 - sum(pe) / (len(labels_gt) * 5)
        return SIBATRACC, SIBATRACC_total

In [8]:
# LBL5 Преобразование данных к YOLO формату
def create_yolo_annotations(data_path, output_path):
    """
    Создает YOLO-аннотации для данных о местоположении мяча.
    
    Args:
        data_path (str): Путь к исходным данным.
        output_path (str): Путь для сохранения YOLO-аннотаций и изображений.
    """
    os.makedirs(output_path, exist_ok=True)

    for game in os.listdir(data_path):
        if 'game' not in game:
            continue

        game_path = os.path.join(data_path, game)
        for clip in os.listdir(game_path):
            clip_path = os.path.join(game_path, clip)
            labels_path = os.path.join(clip_path, 'labels.csv')

            if not os.path.exists(labels_path):
                continue

            df = pd.read_csv(labels_path)
            for _, row in df.iterrows():
                visibility = row['visibility']
                filename = row['file name']
                
                # Копирование изображения
                image_path = os.path.join(clip_path, filename)
                output_image_path = os.path.join(output_path, f"{game}_{clip}_{filename}")
                shutil.copy(image_path, output_image_path)

                # Если мяч отсутствует, создаем пустой файл аннотации
                if visibility == 0:
                    label_path = os.path.join(output_path, f"{game}_{clip}_{filename[:4]}.txt")
                    open(label_path, 'w').close()  # Создаем пустой файл
                    continue

                # Обрабатываем кадры с мячом
                x, y = row['x-coordinate'], row['y-coordinate']

                # Нормализация координат
                image_height, image_width = (720, 1280)
                x_center = float(x) / image_width
                y_center = float(y) / image_height
                width = height = 0.02

                # Формирование пути для сохранения аннотаций
                label_path = os.path.join(output_path, f"{game}_{clip}_{filename[:4]}.txt")
                with open(label_path, 'w') as label_file:
                    label_file.write(f'{visibility} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n')
                    
# LBL1 Валидация модели на части обучающей выборки
def split_data(input_dir, train_dir, val_dir, val_size=0.2):
    """
    Разделяет данные на тренировочный и валидационный наборы.
    
    Args:
        input_dir (str): Путь к директории с исходными данными.
        train_dir (str): Путь для сохранения тренировочного набора.
        val_dir (str): Путь для сохранения валидационного набора.
        val_size (float): Доля данных для валидационного набора (от 0 до 1).
    """
    os.makedirs(train_dir + '/labels', exist_ok=True)
    os.makedirs(train_dir + '/images', exist_ok=True)
    os.makedirs(val_dir + '/labels', exist_ok=True)
    os.makedirs(val_dir + '/images', exist_ok=True)

    all_files = [f for f in os.listdir(input_dir) if f.endswith('.txt')]
    random.shuffle(all_files)

    split_index = int(len(all_files) * (1 - val_size))
    train_files = all_files[:split_index]
    val_files = all_files[split_index:]

    def move_files(file_list, target_dir):
        for file in file_list:
            shutil.move(os.path.join(input_dir, file), os.path.join(target_dir + '/labels', file))
            shutil.move(os.path.join(input_dir, file.replace('.txt', '.jpg')), os.path.join(target_dir + '/images', file.replace('.txt', '.jpg')))

    move_files(train_files, train_dir)
    move_files(val_files, val_dir)

def create_yaml_file(yaml_path):
    """
    Создает YAML-файл для обучения модели YOLO.
    
    Args:
        train_dir (str): Путь к тренировочному набору данных.
        val_dir (str): Путь к валидационному набору данных.
        yaml_path (str): Путь для сохранения YAML-файла.
    """
    data = {
        'train': 'train/images',
        'val': 'val/images',
        'nc': 4,  # Количество классов (0, 1, 2, 3)
        'names': ['no-ball', 'easy', 'hard', 'occluded']  # Названия классов
    }

    with open(yaml_path, 'w') as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False)

# Пример использования функций
data_path = '/kaggle/input/tennistrackingassignment'
output_path = './yolo_annotations/'
train_dir = './yolo_annotations/train'
val_dir = './yolo_annotations/val'
yaml_path = './yolo_annotations/data.yaml'

create_yolo_annotations(data_path, output_path)
split_data(output_path, train_dir, val_dir)
create_yaml_file(yaml_path)

In [9]:
class SuperTrackingModel:

    def __init__(self, batch_s, stack_s, out_path, downscale):
        self.batch_s = batch_s
        self.stack_s = stack_s
        self.out_path = out_path
        self.downscale = downscale
        self.original_shape = (720, 1280)
        self.model_shape = None
        self.model = None

    # LBL3 Загрузка модели с какой-то конкретной итерации обучения (если используется итеративное обучение)
    def load(self):
        # todo: add code for loading model here
        model_path = f'tennis_yolo.pt'
        file_id = "1YARi3dU6aIdSPIv7QO64Kt5WJqyuS1wt"
        gdown.download(f"https://drive.google.com/uc?id={file_id}", model_path, quiet=False)

        self.model = YOLO(model_path)

    def predict_on_batch(self, batch: np.ndarray) -> np.ndarray:
        # todo: add code for batch mask prediction here
        results = self.model(batch, verbose=False)
        predicted_boxes = []
        for result in results:
            if len(result.boxes.cls) == 0:
                predicted_boxes.append((0, None))
                continue

            if self.model_shape is None:
                self.model_shape = result.orig_shape
                
            ball_pred = max(result.boxes, key=lambda result: result.conf)
            predicted_boxes.append((ball_pred.cls.cpu().numpy()[0], ball_pred.xyxy[0].cpu().numpy()))
            
        return predicted_boxes
        
    def _predict_prob_on_clip(self, clip: np.ndarray) -> np.ndarray:
        print('doing predictions')
        n_frames = clip.shape[0]

        add_frames = 0
        while n_frames % self.batch_s != 0:
            clip = np.append(clip, [clip[-1]], axis=0)
            n_frames += 1
            add_frames += 1

        batches = []
        for i in range(0, n_frames, self.batch_s):
            batch = [clip[j] for j in range(i, i + self.batch_s)]
            batches.append(batch)

        predictions = []
        for batch in batches:
            pred = self.predict_on_batch(batch)
            predictions.extend(pred)
            
        print('predictions are made')
        return predictions

    def get_labels_from_prediction(self, predictions: List, upscale_coords: bool) -> np.ndarray:
        # todo: get ball coordinates from predicted masks
        coords = np.zeros((len(predictions), 3), dtype=np.float32)
        
        for i, (cls, box) in enumerate(predictions):
            if cls == 0:
                coords[i] = [0, 0, 0]
                continue
                
            x1 = float(box[0])
            y1 = float(box[1])
            x2 = float(box[2])
            y2 = float(box[3])
            x_center = (x1 + x2) / 2
            y_center = (y1 + y2) / 2
            
            if upscale_coords:
                scale_x = self.original_shape[1] / self.model_shape[1]
                scale_y = self.original_shape[0] / self.model_shape[0]
                x_center *= scale_x
                y_center *= scale_y
            coords[i] = [cls, x_center, y_center]
        
        return coords

    def predict(self, clip: np.ndarray, upscale_coords=True) -> tuple[np.ndarray, np.ndarray]:
        prob_pr = self._predict_prob_on_clip(clip)
        labels_pr = self.get_labels_from_prediction(prob_pr, upscale_coords)
        return labels_pr, prob_pr

    def test(self, data_path: Path, games: List[int], do_visualization=False, test_name='test'):
        game_clip_pairs = get_game_clip_pairs(data_path, games)
        SIBATRACC_vals = []
        for game, clip in game_clip_pairs:
            data = load_clip_data(data_path, game, clip, downscale=self.downscale)
            if do_visualization:
                data_full = load_clip_data(data_path, game, clip, downscale=False) if self.downscale else data
            labels_gt = load_clip_labels(data_path, game, clip, downscale=False)
            labels_pr, prob_pr = self.predict(data)
            SIBATRACC_per_frame, SIBATRACC_total = Metrics.evaluate_predictions(labels_gt, labels_pr)
            SIBATRACC_vals.append(SIBATRACC_total)
            if do_visualization:
                visualize_prediction(data_full, labels_pr, self.out_path, f'{test_name}_g{game}_c{clip}', SIBATRACC_per_frame)
                visualize_prob(data, prob_pr, self.out_path, f'{test_name}_g{game}_c{clip}')
                del data_full
            del data, labels_gt, labels_pr, prob_pr
            gc.collect()
        SIBATRACC_final = sum(SIBATRACC_vals) / len(SIBATRACC_vals)
        return SIBATRACC_final

    def train(self):
        # todo: implement model training here
        if self.model is None:
            self.model = YOLO('yolo11l.pt')

        # LBL2 Автоматическое сохранение модели при обучении
        # LBL4 Вывод различных показателей в процессе обучения (например, значение функции потерь на каждой эпохе)
        self.model.train(
            data='/kaggle/working/yolo_annotations/data.yaml',
            epochs=10,
            batch=10,
            imgsz=640,
            workers=4,
        )

In [2]:
output_path = prepare_experiment(Path('/kaggle/working'))
model = CustomTrackingModel(10, 10, out_path=output_path, downscale=True)

model.train()

New https://pypi.org/project/ultralytics/8.3.52 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.47 🚀 Python-3.11.5 torch-2.5.1+cu124 CUDA:0 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:1 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:2 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:3 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:4 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:5 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:6 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:7 (NVIDIA A100-SXM4-80GB, 81158MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11l.pt, data=/opt/notebooks/sports-tracking-tystem/yolo_annotations/data.yam

E0000 00:00:1734779623.452766 4037372 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734779623.458076 4037372 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Overriding model.yaml nc=80 with nc=4

                   from  n    params  module                                       arguments                     
  0                  -1  1      1856  ultralytics.nn.modules.conv.Conv             [3, 64, 3, 2]                 
  1                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  2                  -1  2    173824  ultralytics.nn.modules.block.C3k2            [128, 256, 2, True, 0.25]     
  3                  -1  1    590336  ultralytics.nn.modules.conv.Conv             [256, 256, 3, 2]              
  4                  -1  2    691712  ultralytics.nn.modules.block.C3k2            [256, 512, 2, True, 0.25]     
  5                  -1  1   2360320  ultralytics.nn.modules.conv.Conv             [512, 512, 3, 2]              
  6                  -1  2   2234368  ultralytics.nn.modules.block.C3k2            [512, 512, 2, True]           
  7                  -1  1   2360320  ultralytics

E0000 00:00:1734779634.526559 4037475 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734779634.533932 4037475 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


[34m[1mTensorBoard: [0mStart with 'tensorboard --logdir /opt/notebooks/sports-tracking-tystem/runs/detect/train20', view at http://localhost:6006/
Overriding model.yaml nc=80 with nc=4
Transferred 1009/1015 items from pretrained weights
Freezing layer 'model.23.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks...
[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /opt/notebooks/sports-tracking-tystem/yolo_annotations/train/labels... 11436 images, 564 backgrounds, 0 corrupt: 100%|██████████| 11436/11436 [00:06<00:00, 1655.03it/s]


[34m[1mtrain: [0mNew cache created: /opt/notebooks/sports-tracking-tystem/yolo_annotations/train/labels.cache


[34m[1mval: [0mScanning /opt/notebooks/sports-tracking-tystem/yolo_annotations/val/labels... 4293 images, 216 backgrounds, 0 corrupt: 100%|██████████| 4293/4293 [00:02<00:00, 1568.15it/s]


[34m[1mval: [0mNew cache created: /opt/notebooks/sports-tracking-tystem/yolo_annotations/val/labels.cache
Plotting labels to /opt/notebooks/sports-tracking-tystem/runs/detect/train20/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000714, momentum=0.9) with parameter groups 167 weight(decay=0.0), 174 weight(decay=0.0005625000000000001), 173 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 128 dataloader workers
Logging results to [1m/opt/notebooks/sports-tracking-tystem/runs/detect/train20[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10       6.8G      1.795      4.481      1.187          7        640: 100%|██████████| 159/159 [00:45<00:00,  3.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:17<00:00, 13.46it/s]


                   all       4293       4077      0.336    0.00451   0.000593   9.44e-05

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10       6.7G      1.174      1.119     0.9241          7        640: 100%|██████████| 159/159 [00:43<00:00,  3.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:18<00:00, 13.25it/s]


                   all       4293       4077      0.958      0.261       0.31      0.203

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10       6.7G      1.114      1.062     0.9027          8        640: 100%|██████████| 159/159 [00:43<00:00,  3.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:18<00:00, 13.24it/s]


                   all       4293       4077      0.933      0.265       0.31      0.171

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10       6.7G      1.053     0.9764     0.8978          8        640: 100%|██████████| 159/159 [00:43<00:00,  3.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:17<00:00, 13.33it/s]


                   all       4293       4077      0.641      0.328      0.326      0.134

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10       6.7G     0.9717     0.8283     0.8703          7        640: 100%|██████████| 159/159 [00:43<00:00,  3.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:17<00:00, 13.45it/s]


                   all       4293       4077      0.706      0.357      0.375     0.0565

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10       6.7G     0.9319     0.7665     0.8561          8        640: 100%|██████████| 159/159 [00:43<00:00,  3.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:17<00:00, 13.47it/s]


                   all       4293       4077      0.458      0.488      0.446      0.116

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10       6.7G     0.8707     0.7151     0.8542          8        640: 100%|██████████| 159/159 [00:43<00:00,  3.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:17<00:00, 13.50it/s]


                   all       4293       4077      0.482        0.5      0.444      0.119

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10       6.7G      0.846     0.6745     0.8526          8        640: 100%|██████████| 159/159 [00:43<00:00,  3.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:17<00:00, 13.52it/s]


                   all       4293       4077      0.562      0.543      0.505      0.145

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10       6.7G     0.8389      0.658     0.8415          7        640: 100%|██████████| 159/159 [00:43<00:00,  3.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:17<00:00, 13.49it/s]


                   all       4293       4077      0.621      0.538      0.521      0.197

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10       6.7G     0.7777     0.5911     0.8333          7        640: 100%|██████████| 159/159 [00:43<00:00,  3.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:17<00:00, 13.52it/s]


                   all       4293       4077      0.697      0.565      0.563        0.2

10 epochs completed in 0.177 hours.
Optimizer stripped from /opt/notebooks/sports-tracking-tystem/runs/detect/train20/weights/last.pt, 51.2MB
Optimizer stripped from /opt/notebooks/sports-tracking-tystem/runs/detect/train20/weights/best.pt, 51.2MB

Validating /opt/notebooks/sports-tracking-tystem/runs/detect/train20/weights/best.pt...
Ultralytics 8.3.47 🚀 Python-3.11.5 torch-2.5.1+cu124 CUDA:0 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:1 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:2 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:3 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:4 (NVIDIA A100-SXM4-80GB, 81158MiB)
                                                      CUDA:5 (NVIDIA A100-SXM4-80GB, 81158MiB)
    

                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 239/239 [00:17<00:00, 13.43it/s]


                   all       4293       4077      0.696      0.565      0.563        0.2
                  easy       3695       3695      0.885      0.961      0.977      0.369
                  hard        362        362      0.508      0.384      0.394      0.129
              occluded         20         20      0.696       0.35      0.319      0.102
Speed: 0.1ms preprocess, 1.5ms inference, 0.0ms loss, 0.7ms postprocess per image
Results saved to [1m/opt/notebooks/sports-tracking-tystem/runs/detect/train20[0m


In [13]:
output_path = prepare_experiment(Path('/kaggle/working'))
new_model = SuperTrackingModel(10, 10, out_path=output_path, downscale=True)
new_model.load()
sibatracc_final = new_model.test(Path('/kaggle/input/tennistrackingassignment/test'), [1,2], do_visualization=False, test_name='test')
print(f'SiBaTrAcc final value: {sibatracc_final}')

Downloading...
From (original): https://drive.google.com/uc?id=1YARi3dU6aIdSPIv7QO64Kt5WJqyuS1wt
From (redirected): https://drive.google.com/uc?id=1YARi3dU6aIdSPIv7QO64Kt5WJqyuS1wt&confirm=t&uuid=71f9abec-688e-47a0-b54a-31331c2d1203
To: /kaggle/working/tennis_yolo.pt
100%|██████████| 51.2M/51.2M [00:00<00:00, 94.2MB/s]


loading clip data (game 1, clip 1) downscaled
loading clip labels (game 1, clip 1)
doing predictions
predictions are made
loading clip data (game 1, clip 2) downscaled
loading clip labels (game 1, clip 2)
doing predictions
predictions are made
loading clip data (game 1, clip 3) downscaled
loading clip labels (game 1, clip 3)
doing predictions
predictions are made
loading clip data (game 1, clip 4) downscaled
loading clip labels (game 1, clip 4)
doing predictions
predictions are made
loading clip data (game 1, clip 5) downscaled
loading clip labels (game 1, clip 5)
doing predictions
predictions are made
loading clip data (game 1, clip 6) downscaled
loading clip labels (game 1, clip 6)
doing predictions
predictions are made
loading clip data (game 1, clip 7) downscaled
loading clip labels (game 1, clip 7)
doing predictions
predictions are made
loading clip data (game 1, clip 8) downscaled
loading clip labels (game 1, clip 8)
doing predictions
predictions are made
loading clip data (game 