# Лабораторная работа №3: детекция объектов на малых датасетах
### Сравнение CNN‑модели YOLOv8 и трансформер-модели RT‑DETR v2


## 1. Установка окружения и зависимостей

In [2]:
import os
os.environ["TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD"] = "1"
!pip install -q ultralytics==8.2.26 transformers==4.51.3 datasets torchmetrics opencv-python tqdm pyyaml

## 2. Скачивание и распаковка датасета COCO128

In [3]:
import urllib.request, zipfile
from pathlib import Path

DATA_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip"
ZIP_PATH = "coco128.zip"
DATA_DIR = Path("coco128")

if not DATA_DIR.exists():
    urllib.request.urlretrieve(DATA_URL, ZIP_PATH)
    with zipfile.ZipFile(ZIP_PATH, 'r') as z:
        z.extractall(DATA_DIR)

IMGS_DIR = DATA_DIR / "images" / "train2017"
LABELS_DIR = DATA_DIR / "labels" / "train2017"


## 3. Подготовка Dataset и DataLoader

In [4]:
import torch, random
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
from PIL import Image

class YOLOTxtDataset(Dataset):
    def __init__(self, img_dir, lbl_dir, sample_frac=1.0):
        self.img_paths = sorted(Path(img_dir).glob("*.jpg"))
        if sample_frac < 1.0:
            self.img_paths = random.sample(self.img_paths, int(len(self.img_paths) * sample_frac))
        self.lbl_dir = Path(lbl_dir)

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = Image.open(img_path).convert("RGB")
        w, h = img.size
        lbl_path = self.lbl_dir / f"{img_path.stem}.txt"
        boxes, labels = [], []
        if lbl_path.exists():
            for line in open(lbl_path):
                cls, xc, yc, bw, bh = map(float, line.split())
                x_min = (xc - bw/2) * w
                y_min = (yc - bh/2) * h
                x_max = (xc + bw/2) * w
                y_max = (yc + bh/2) * h
                boxes.append([x_min, y_min, x_max, y_max])
                labels.append(int(cls))
        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.long)
        }
        return img, target

dataset_val = YOLOTxtDataset(IMGS_DIR, LABELS_DIR, sample_frac=0.5)
val_loader = DataLoader(dataset_val, batch_size=4, shuffle=False, collate_fn=lambda x: list(zip(*x)))


## 4. CNN‑подход — YOLOv8n

In [5]:
from ultralytics import YOLO

data_yaml = "ultralytics/cfg/datasets/coco128.yaml"
yolo_model = YOLO("yolov8n.pt")
yolo_model.train(data=data_yaml, epochs=10, imgsz=640, batch=16, device=0)
yolo_metrics = yolo_model.val(data=data_yaml, imgsz=640, device=0)
print(yolo_metrics.box.map50, yolo_metrics.box.map)


Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:06<00:00, 958kB/s] 
  ckpt = torch.load(file, map_location="cpu")


New https://pypi.org/project/ultralytics/8.3.133 available  Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.2.26  Python-3.11.7 torch-2.7.0+cpu 


ValueError: Invalid CUDA 'device=0' requested. Use 'device=cpu' or pass valid CUDA device(s) if available, i.e. 'device=0' or 'device=0,1,2,3' for Multi-GPU.

torch.cuda.is_available(): False
torch.cuda.device_count(): 0
os.environ['CUDA_VISIBLE_DEVICES']: None
See https://pytorch.org/get-started/locally/ for up-to-date torch install instructions if no CUDA devices are seen by torch.


## 5. Transformer‑подход — RT‑DETR v2

In [None]:
from transformers import RTDetrV2ForObjectDetection, RTDetrImageProcessor
import torch
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from tqdm import tqdm

device = "cuda"
processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_v2_r18vd")
rtdetr = RTDetrV2ForObjectDetection.from_pretrained("PekingU/rtdetr_v2_r18vd").to(device).eval()
metric_map = MeanAveragePrecision(box_format="xyxy", iou_type="bbox").to(device)

for imgs, targets in tqdm(val_loader):
    inputs = processor(images=list(imgs), return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = rtdetr(**inputs)
    results = processor.post_process_object_detection(
        outputs,
        target_sizes=[(im.height, im.width) for im in imgs],
        threshold=0.001
    )
    preds = [
        {"boxes": r["boxes"].to(device), "scores": r["scores"].to(device), "labels": r["labels"].to(device)}
        for r in results
    ]
    metric_map.update(
        preds,
        [{"boxes": t["boxes"].to(device), "labels": t["labels"].to(device)} for t in targets]
    )

rtdetr_metrics = metric_map.compute()
print(rtdetr_metrics["map_50"], rtdetr_metrics["map"])


100%|██████████| 16/16 [00:01<00:00, 10.22it/s]


tensor(0.8148) tensor(0.6513)


## 6. Сравнение скорости — FPS Bench

In [None]:
import time
import torch
import pandas as pd

def bench_fps(model_fn, imgsz=640, num=32):
    dummy = torch.randint(0, 255, (num, 3, imgsz, imgsz), dtype=torch.uint8, device="cuda").float()
    torch.cuda.synchronize()
    t0 = time.time()
    with torch.no_grad():
        _ = model_fn(dummy)
    torch.cuda.synchronize()
    return num / (time.time() - t0)

fps_yolo = bench_fps(lambda x: yolo_model(x))
fps_rtd = bench_fps(lambda x: rtdetr(x))

cmp = pd.DataFrame({
    "model": ["YOLOv8n", "RT-DETR v2-R18"],
    "mAP@0.5": [float(yolo_metrics.box.map50), float(rtdetr_metrics["map_50"])],
    "mAP@0.5:0.95": [float(yolo_metrics.box.map), float(rtdetr_metrics["map"])],
    "FPS": [fps_yolo, fps_rtd],
    "Params_M": [3.2, 14]
})
cmp



0: 640x640 (no detections), 2.0ms
1: 640x640 (no detections), 2.0ms
2: 640x640 (no detections), 2.0ms
3: 640x640 (no detections), 2.0ms
4: 640x640 (no detections), 2.0ms
5: 640x640 (no detections), 2.0ms
6: 640x640 (no detections), 2.0ms
7: 640x640 (no detections), 2.0ms
8: 640x640 (no detections), 2.0ms
9: 640x640 (no detections), 2.0ms
10: 640x640 (no detections), 2.0ms
11: 640x640 (no detections), 2.0ms
12: 640x640 (no detections), 2.0ms
13: 640x640 (no detections), 2.0ms
14: 640x640 (no detections), 2.0ms
15: 640x640 (no detections), 2.0ms
16: 640x640 (no detections), 2.0ms
17: 640x640 (no detections), 2.0ms
18: 640x640 (no detections), 2.0ms
19: 640x640 (no detections), 2.0ms
20: 640x640 (no detections), 2.0ms
21: 640x640 (no detections), 2.0ms
22: 640x640 (no detections), 2.0ms
23: 640x640 (no detections), 2.0ms
24: 640x640 (no detections), 2.0ms
25: 640x640 (no detections), 2.0ms
26: 640x640 (no detections), 2.0ms
27: 640x640 (no detections), 2.0ms
28: 640x640 (no detections), 

Unnamed: 0,model,mAP@0.5,mAP@0.5:0.95,FPS,Params_M
0,YOLOv8n,0.707633,0.533289,296.467659,3.2
1,RT-DETR v2-R18,0.814849,0.651291,91.480319,14.0


Задание для самостоятельной работы