# Defining Project Path

In [1]:
import sys
from pathlib import Path
import os
sys.path.append(str(Path(os.getcwd()).resolve().parent))

# Path of the project
PROJECT_PATH = Path(os.getcwd()).resolve().parent

# Loading Model from checkpoint

In [2]:
import importlib
import models.def_detr_model
importlib.reload(models.def_detr_model)

from models.def_detr_model import DefDetrModel

CHECKPOINT_PATH = PROJECT_PATH / 'checkpoints' / 'def_detr_checkpoints' / 'checkpoint_e20_15112025_182047.pth'

MODEL = DefDetrModel.from_pretrained(
    "Mikolaj1234/def-detr-football-ai",
    device="cuda"
)

  from .autonotebook import tqdm as notebook_tqdm
Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 3337.82it/s]


# Process Video

In [None]:
from utils.box_ops import cxcywh2xyxy, denormalize_bboxes
import supervision as sv
import torch
from tqdm import tqdm

VIDEO_PATH = PROJECT_PATH / 'data' / 'origin_videos' / 'football_06.mp4'
video_info = sv.VideoInfo.from_video_path(VIDEO_PATH)

PROCESSED_VIDEO_DIR = PROJECT_PATH / 'processed_video'
PROCESSED_VIDEO_DIR.mkdir(parents=True, exist_ok=True)
TARGET_VIDEO_PATH = PROCESSED_VIDEO_DIR / f'{VIDEO_PATH.stem}_def_detr_processed.mp4'
video_sink = sv.VideoSink(TARGET_VIDEO_PATH, video_info=video_info)

# Define annotator objects for visualization
box_annotator = sv.BoxAnnotator(
    color=sv.ColorPalette.from_hex(['#FF8C00', '#00BFFF', '#FF1493', '#FFD700']),
    thickness=2
)
label_annotator = sv.LabelAnnotator(
    color=sv.ColorPalette.from_hex(['#FF8C00', '#00BFFF', '#FF1493', '#FFD700']),
    text_color=sv.Color.from_hex('#000000')
)
id2label = {
    1: 'Ball',
    2: 'Goalkeeper',
    3: 'Player',
    4: 'Referee'
}
label2id = {v: k for k, v in id2label.items()}

tracker = sv.ByteTrack()
tracker.reset()

frame_generator = sv.get_video_frames_generator(VIDEO_PATH)
with video_sink:
    for frame in tqdm(frame_generator, total=video_info.total_frames, desc=f'Processing video: {VIDEO_PATH.stem}'):
        img = torch.as_tensor(frame, dtype=torch.uint8)
        img = img.permute(2, 0, 1)
        img = [img]
        outputs = MODEL(img)

        raw_logits = outputs.logits[0].cpu()
        preds = raw_logits.softmax(-1)
        scores, cls_ids = preds.max(-1)

        pred_boxes = outputs.pred_boxes[0].cpu()
        img_height = int(outputs.orig_size[0][0])
        img_width = int(outputs.orig_size[0][1])
        bboxes = cxcywh2xyxy(pred_boxes)
        bboxes = denormalize_bboxes(bboxes, img_height, img_width)

        bboxes = bboxes.numpy()
        cls_ids = cls_ids.numpy()
        scores = scores.numpy()
        detections = sv.Detections(
            xyxy=bboxes,
            confidence=scores,
            class_id=cls_ids
        )

        detections = detections[detections.confidence > 0.6]
        detections = detections.with_nms(threshold=0.5, class_agnostic=True)

        ball_detections = detections[detections.class_id == label2id['Ball']]
        ball_detections.xyxy = sv.pad_boxes(xyxy=ball_detections.xyxy, px=10)

        all_detections = detections[detections.class_id != label2id['Ball']]
        all_detections = tracker.update_with_detections(detections=all_detections)

        goalkeepers_detections = all_detections[all_detections.class_id == label2id['Goalkeeper']]
        players_detections = all_detections[all_detections.class_id == label2id['Player']]
        referees_detections = all_detections[all_detections.class_id == label2id['Referee']]

        all_detections = sv.Detections.merge([ball_detections, players_detections, goalkeepers_detections, referees_detections])

        labels = [
            f"#{id2label[cls_id]}"
            for cls_id
            in all_detections.class_id
        ]
        
        img = img[0].numpy()
        img = img.transpose(1, 2, 0)
        annotated_img = img.copy()
        annotated_img = box_annotator.annotate(annotated_img, all_detections)
        annotated_img = label_annotator.annotate(scene=annotated_img, detections=all_detections, labels=labels)

        video_sink.write_frame(annotated_img)

Processing video: football_06: 100%|██████████| 750/750 [04:50<00:00,  2.58it/s]
