In [None]:
!pip install torchvision

# Загрузка Pascal VOC 2012
from torchvision import datasets
voc_dataset = datasets.VOCDetection(root='/path/to/pascal_voc', year='2012', image_set='trainval', download=True)

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchvision)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch==2.6.0->torchvision)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch==2.6.0->torchvision)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86

100%|██████████| 2.00G/2.00G [01:45<00:00, 19.0MB/s]


### YOLOv8 требует аннотации в текстовом формате (.txt), а Pascal VOC предоставляет аннотации в формате XML. Нужно конвертировать XML в YOLO-формат, где каждая строка в .txt файле соответствует объекту в формате: <class_id> <x_center> <y_center> <width> <height> - нормализованных координат.

In [2]:
import os
import xml.etree.ElementTree as ET
from PIL import Image

voc_classes = [
    'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
    'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

# Путь к данным
voc_root = '/path/to/pascal_voc/VOCdevkit/VOC2012'
annotations_dir = os.path.join(voc_root, 'Annotations')
images_dir = os.path.join(voc_root, 'JPEGImages')
output_dir = '/path/to/pascal_voc/yolo_labels'  # Папка для YOLO-аннотаций

# Создание папки для YOLO-аннотаций
os.makedirs(output_dir, exist_ok=True)

# Конвертация XML в YOLO
for xml_file in os.listdir(annotations_dir):
    if not xml_file.endswith('.xml'):
        continue

    # Парсинг XML
    xml_path = os.path.join(annotations_dir, xml_file)
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Получение размеров изображения
    img_name = root.find('filename').text
    img_path = os.path.join(images_dir, img_name)
    img = Image.open(img_path)
    img_width, img_height = img.size

    # Создание YOLO txt файла
    txt_name = xml_file.replace('.xml', '.txt')
    txt_path = os.path.join(output_dir, txt_name)

    with open(txt_path, 'w') as f:
        for obj in root.findall('object'):
            class_name = obj.find('name').text
            if class_name not in voc_classes:
                continue
            class_id = voc_classes.index(class_name)

            # Получение координат bounding box
            bndbox = obj.find('bndbox')
            xmin = float(bndbox.find('xmin').text)
            ymin = float(bndbox.find('ymin').text)
            xmax = float(bndbox.find('xmax').text)
            ymax = float(bndbox.find('ymax').text)

            # Конвертация в YOLO-формат
            x_center = (xmin + xmax) / 2 / img_width
            y_center = (ymin + ymax) / 2 / img_height
            width = (xmax - xmin) / img_width
            height = (ymax - ymin) / img_height

            # Запись в файл
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

### Pascal VOC предоставляет файлы в ImageSets/Main/ для разделения на тренировочную и валидационную выборки (train.txt, val.txt).

In [None]:
import shutil

image_sets_dir = os.path.join(voc_root, 'ImageSets', 'Main')
train_list = os.path.join(image_sets_dir, 'train.txt')
val_list = os.path.join(image_sets_dir, 'val.txt')
images_train_dir = '/path/to/pascal_voc/images/train'
images_val_dir = '/path/to/pascal_voc/images/val'
labels_train_dir = '/path/to/pascal_voc/labels/train'
labels_val_dir = '/path/to/pascal_voc/labels/val'

os.makedirs(images_train_dir, exist_ok=True)
os.makedirs(images_val_dir, exist_ok=True)
os.makedirs(labels_train_dir, exist_ok=True)
os.makedirs(labels_val_dir, exist_ok=True)

with open(train_list, 'r') as f:
    for line in f:
        img_name = line.strip() + '.jpg'
        label_name = line.strip() + '.txt'
        shutil.copy(os.path.join(images_dir, img_name), os.path.join(images_train_dir, img_name))
        shutil.copy(os.path.join(output_dir, label_name), os.path.join(labels_train_dir, label_name))

with open(val_list, 'r') as f:
    for line in f:
        img_name = line.strip() + '.jpg'
        label_name = line.strip() + '.txt'
        shutil.copy(os.path.join(images_dir, img_name), os.path.join(images_val_dir, img_name))
        shutil.copy(os.path.join(output_dir, label_name), os.path.join(labels_val_dir, label_name))

In [4]:
!touch /path/to/pascal_voc/data.yaml

In [None]:
data_yaml = '/path/to/pascal_voc/data.yaml'

# Проверка содержимого data.yaml
with open(data_yaml, 'r') as f:
    print(f.read())




In [6]:
import os

# Проверка наличия файлов
print(f"Train images: {len(os.listdir('/path/to/pascal_voc/images/train'))}")
print(f"Train labels: {len(os.listdir('/path/to/pascal_voc/labels/train'))}")
print(f"Val images: {len(os.listdir('/path/to/pascal_voc/images/val'))}")
print(f"Val labels: {len(os.listdir('/path/to/pascal_voc/labels/val'))}")

Train images: 5717
Train labels: 5717
Val images: 5823
Val labels: 5823


In [None]:
data_yaml = '/path/to/pascal_voc/data.yaml'

### Сначала установим библиотеку Ultralytics и загрузим модель YOLOv8n (nano-версия для экономии ресурсов)

In [None]:
!pip install ultralytics

# Импорт библиотек
import torch
from ultralytics import YOLO

# Проверка доступности GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Используемое устройство: {device}")

# Загрузка модели YOLOv11
model = YOLO('yolov8n.pt')  # Используем nano-версию для быстрого обучения

Collecting ultralytics
  Downloading ultralytics-8.3.134-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.134-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m65.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.14-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.134 ultralytics-thop-2.0.14
Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Используемое устройство: cuda
Downloading https://github.com/ultralytics/assets/releases/download/v8.

100%|██████████| 6.25M/6.25M [00:00<00:00, 319MB/s]


### Обучим модель YOLOv8n на выбранном наборе данных. Количество эпох поставим значение 20, размер батча 16, размер изображения 640.

In [None]:
# Обучение модели
results = model.train(
    data=data_yaml,
    epochs=20,
    batch=16,
    imgsz=640,
    device=device    # Устройство для обучения
)

Ultralytics 8.3.134 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/path/to/pascal_voc/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train4, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0

[34m[1mtrain: [0mScanning /path/to/pascal_voc/labels/train.cache... 5717 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5717/5717 [00:00<?, ?it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1353.4±910.9 MB/s, size: 99.9 KB)


[34m[1mval: [0mScanning /path/to/pascal_voc/labels/val.cache... 5823 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5823/5823 [00:00<?, ?it/s]


Plotting labels to runs/detect/train4/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000417, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train4[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20      2.18G      1.013      2.782       1.28         28        640: 100%|██████████| 358/358 [01:47<00:00,  3.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:47<00:00,  3.84it/s]


                   all       5823      15787      0.643      0.556      0.592      0.411

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20      3.31G      1.069      1.964      1.331         31        640: 100%|██████████| 358/358 [01:41<00:00,  3.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:47<00:00,  3.87it/s]


                   all       5823      15787      0.628      0.559      0.586        0.4

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      3.33G      1.099      1.915      1.352         25        640: 100%|██████████| 358/358 [01:42<00:00,  3.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:46<00:00,  3.95it/s]


                   all       5823      15787      0.632      0.512      0.556      0.365

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20      3.34G      1.098      1.827      1.347         21        640: 100%|██████████| 358/358 [01:40<00:00,  3.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:48<00:00,  3.76it/s]


                   all       5823      15787      0.621      0.536      0.562      0.372

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      3.34G      1.079       1.75       1.33         28        640: 100%|██████████| 358/358 [01:42<00:00,  3.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:46<00:00,  3.91it/s]


                   all       5823      15787      0.646      0.559      0.598       0.39

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20      3.34G      1.063      1.666      1.322         21        640: 100%|██████████| 358/358 [01:40<00:00,  3.57it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:46<00:00,  3.88it/s]


                   all       5823      15787      0.687      0.556      0.616      0.415

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20      3.34G      1.046      1.596      1.308         35        640: 100%|██████████| 358/358 [01:42<00:00,  3.50it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:45<00:00,  3.97it/s]


                   all       5823      15787      0.677      0.572       0.62      0.424

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20      3.34G      1.028      1.522      1.293         28        640: 100%|██████████| 358/358 [01:39<00:00,  3.58it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:46<00:00,  3.94it/s]


                   all       5823      15787      0.713      0.584      0.647      0.445

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20      3.34G      1.004      1.469      1.281         45        640: 100%|██████████| 358/358 [01:40<00:00,  3.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:47<00:00,  3.81it/s]


                   all       5823      15787      0.724      0.583      0.651      0.453

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20      3.34G     0.9909      1.417      1.266         24        640: 100%|██████████| 358/358 [01:39<00:00,  3.60it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:45<00:00,  4.03it/s]


                   all       5823      15787      0.737      0.588      0.661      0.463
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      3.34G     0.9987      1.396      1.281         16        640: 100%|██████████| 358/358 [01:38<00:00,  3.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:44<00:00,  4.05it/s]


                   all       5823      15787      0.758      0.594      0.677      0.474

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      3.34G     0.9666      1.289      1.258         19        640: 100%|██████████| 358/358 [01:36<00:00,  3.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:44<00:00,  4.09it/s]


                   all       5823      15787      0.744      0.613      0.683      0.481

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      3.34G     0.9463      1.216      1.239         11        640: 100%|██████████| 358/358 [01:36<00:00,  3.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:44<00:00,  4.14it/s]


                   all       5823      15787      0.738      0.615      0.685      0.487

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20      3.34G      0.932      1.156      1.221          9        640: 100%|██████████| 358/358 [01:36<00:00,  3.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:46<00:00,  3.94it/s]


                   all       5823      15787      0.753       0.62      0.695      0.497

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      3.34G     0.9041      1.116      1.203         15        640: 100%|██████████| 358/358 [01:36<00:00,  3.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:45<00:00,  4.00it/s]


                   all       5823      15787      0.754      0.617      0.695      0.496

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      3.34G     0.8836      1.062      1.192         12        640: 100%|██████████| 358/358 [01:34<00:00,  3.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:45<00:00,  4.04it/s]


                   all       5823      15787      0.771      0.636       0.71      0.511

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      3.34G     0.8731      1.029      1.177         14        640: 100%|██████████| 358/358 [01:35<00:00,  3.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:44<00:00,  4.09it/s]


                   all       5823      15787      0.776      0.635      0.711      0.515

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      3.34G     0.8522     0.9916      1.165         11        640: 100%|██████████| 358/358 [01:36<00:00,  3.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:43<00:00,  4.14it/s]


                   all       5823      15787       0.78      0.643       0.72      0.522

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20      3.34G     0.8462     0.9568      1.159         10        640: 100%|██████████| 358/358 [01:37<00:00,  3.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:43<00:00,  4.16it/s]


                   all       5823      15787      0.779       0.65      0.723      0.527

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20      3.34G     0.8226     0.9271       1.14          8        640: 100%|██████████| 358/358 [01:33<00:00,  3.81it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:46<00:00,  3.94it/s]


                   all       5823      15787      0.781      0.653      0.727      0.531

20 epochs completed in 0.810 hours.
Optimizer stripped from runs/detect/train4/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train4/weights/best.pt, 6.2MB

Validating runs/detect/train4/weights/best.pt...
Ultralytics 8.3.134 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 3,009,548 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 182/182 [00:46<00:00,  3.88it/s]


                   all       5823      15787      0.781      0.653      0.727      0.532
             aeroplane        348        484        0.9      0.715      0.797      0.603
               bicycle        290        380      0.844       0.71      0.789      0.588
                  bird        374        629      0.781      0.598      0.689      0.478
                  boat        252        491      0.707      0.462      0.572      0.344
                bottle        369        733      0.776       0.48      0.576      0.388
                   bus        211        320      0.847      0.781      0.816      0.676
                   car        608       1173       0.82      0.663      0.761      0.551
                   cat        544        618      0.836      0.788      0.868      0.685
                 chair        642       1449      0.651      0.509       0.59      0.407
                   cow        154        347      0.737      0.695      0.723      0.523
           diningtabl

### После обучения оцениваем модель на валидационной выборке по значению mAP.

In [None]:
metrics = model.val(data=data_yaml)

print(f"mAP50: {metrics.box.map50}")
print(f"mAP50-95: {metrics.box.map}")

Ultralytics 8.3.134 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2003.1±848.2 MB/s, size: 101.3 KB)


[34m[1mval: [0mScanning /path/to/pascal_voc/labels/val.cache... 5823 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5823/5823 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 364/364 [00:55<00:00,  6.62it/s]


                   all       5823      15787      0.781      0.654      0.727      0.532
             aeroplane        348        484      0.899      0.717      0.796      0.603
               bicycle        290        380      0.841      0.711       0.79      0.589
                  bird        374        629      0.782      0.596      0.688      0.476
                  boat        252        491      0.711      0.462      0.571      0.343
                bottle        369        733      0.778      0.483      0.577      0.388
                   bus        211        320      0.848      0.781      0.816      0.676
                   car        608       1173      0.821      0.662      0.761      0.551
                   cat        544        618      0.837      0.787      0.868      0.686
                 chair        642       1449      0.648      0.511      0.589      0.407
                   cow        154        347      0.739      0.697      0.723      0.523
           diningtabl

### Проверка гипотезы - использование более крупной модели YOLOv8 должно повысить точность, а увеличение количества эпох и размера входного изображения должны помочь в более тонком изучении мелких деталей.

In [None]:
# Загрузка модели YOLOv8s
model_improved = YOLO('yolov8s.pt')

# Обучение с улучшенными параметрами
results_improved = model_improved.train(
    data='/path/to/pascal_voc/data.yaml',
    epochs=25,
    batch=20,
    imgsz=800,
    device=device
)

Ultralytics 8.3.134 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=20, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/path/to/pascal_voc/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=25, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=800, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train6, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0

[34m[1mtrain: [0mScanning /path/to/pascal_voc/labels/train.cache... 5717 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5717/5717 [00:00<?, ?it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 712.4±781.3 MB/s, size: 99.9 KB)


[34m[1mval: [0mScanning /path/to/pascal_voc/labels/val.cache... 5823 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5823/5823 [00:00<?, ?it/s]


Plotting labels to runs/detect/train6/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000417, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.00046875), 63 bias(decay=0.0)
Image sizes 800 train, 800 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train6[0m
Starting training for 25 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/25      6.79G       0.96      2.075        1.3        102        800: 100%|██████████| 286/286 [02:53<00:00,  1.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:09<00:00,  2.09it/s]


                   all       5823      15787      0.729      0.628      0.701      0.491

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/25       6.9G      1.027      1.461      1.344         87        800: 100%|██████████| 286/286 [02:48<00:00,  1.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:08<00:00,  2.13it/s]


                   all       5823      15787      0.616      0.535      0.568      0.361

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/25      6.97G      1.074      1.525      1.394        117        800: 100%|██████████| 286/286 [02:46<00:00,  1.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:08<00:00,  2.12it/s]


                   all       5823      15787      0.555      0.484      0.498      0.305

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/25      6.85G      1.089      1.518        1.4         98        800: 100%|██████████| 286/286 [02:44<00:00,  1.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:09<00:00,  2.10it/s]


                   all       5823      15787      0.612      0.531      0.564       0.36

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/25      6.92G      1.069      1.459      1.391        121        800: 100%|██████████| 286/286 [02:44<00:00,  1.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:07<00:00,  2.17it/s]


                   all       5823      15787       0.64      0.534      0.573      0.363

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/25      6.91G       1.05      1.387      1.374         51        800: 100%|██████████| 286/286 [02:45<00:00,  1.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:06<00:00,  2.19it/s]


                   all       5823      15787      0.673      0.566      0.611      0.396

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/25      6.88G      1.023      1.318      1.355        107        800: 100%|██████████| 286/286 [02:45<00:00,  1.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:08<00:00,  2.15it/s]


                   all       5823      15787      0.674      0.575      0.627      0.412

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/25      6.87G      1.002      1.281      1.339         77        800: 100%|██████████| 286/286 [02:45<00:00,  1.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:07<00:00,  2.17it/s]


                   all       5823      15787      0.711      0.589      0.661      0.446

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/25      6.86G     0.9763      1.212       1.32         95        800: 100%|██████████| 286/286 [02:47<00:00,  1.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:07<00:00,  2.18it/s]


                   all       5823      15787      0.698      0.608      0.665      0.452

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/25      6.84G     0.9518      1.146      1.303         99        800: 100%|██████████| 286/286 [02:42<00:00,  1.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:09<00:00,  2.11it/s]


                   all       5823      15787      0.706      0.601      0.662      0.442

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/25      6.95G     0.9388      1.115       1.29         95        800: 100%|██████████| 286/286 [02:43<00:00,  1.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:08<00:00,  2.12it/s]


                   all       5823      15787      0.727      0.611      0.678      0.469

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/25      6.89G     0.9271      1.077      1.279        104        800: 100%|██████████| 286/286 [02:45<00:00,  1.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:07<00:00,  2.16it/s]


                   all       5823      15787      0.743      0.618      0.695      0.488

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/25      6.89G      0.908      1.036      1.262         62        800: 100%|██████████| 286/286 [02:43<00:00,  1.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:07<00:00,  2.17it/s]


                   all       5823      15787      0.751      0.621      0.691      0.477

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/25      6.86G     0.8867     0.9861      1.248        107        800: 100%|██████████| 286/286 [02:46<00:00,  1.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:06<00:00,  2.19it/s]


                   all       5823      15787       0.75      0.632      0.709      0.499

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/25      6.82G      0.871     0.9583      1.241         92        800: 100%|██████████| 286/286 [02:46<00:00,  1.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:06<00:00,  2.18it/s]


                   all       5823      15787      0.757      0.647       0.72      0.505
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/25      6.81G     0.8825     0.8489      1.253         28        800: 100%|██████████| 286/286 [02:42<00:00,  1.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:08<00:00,  2.14it/s]


                   all       5823      15787      0.754       0.64      0.708      0.501

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/25      6.89G     0.8476     0.7784      1.224         47        800: 100%|██████████| 286/286 [02:39<00:00,  1.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:06<00:00,  2.19it/s]


                   all       5823      15787      0.778      0.649      0.728      0.521

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/25      6.86G     0.8245     0.7444      1.204         47        800: 100%|██████████| 286/286 [02:39<00:00,  1.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:08<00:00,  2.15it/s]


                   all       5823      15787      0.776      0.652      0.731      0.525

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/25      6.87G     0.8082     0.7086       1.19         36        800: 100%|██████████| 286/286 [02:38<00:00,  1.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:08<00:00,  2.13it/s]


                   all       5823      15787      0.782       0.66      0.739      0.534

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/25      6.89G      0.779     0.6634      1.163         49        800: 100%|██████████| 286/286 [02:39<00:00,  1.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:07<00:00,  2.15it/s]


                   all       5823      15787      0.795      0.658      0.743      0.544

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/25      6.88G     0.7637     0.6321      1.155         59        800: 100%|██████████| 286/286 [02:39<00:00,  1.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:05<00:00,  2.22it/s]


                   all       5823      15787      0.804      0.659      0.742      0.539

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/25      6.83G     0.7383     0.5994      1.136         47        800: 100%|██████████| 286/286 [02:39<00:00,  1.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:07<00:00,  2.16it/s]


                   all       5823      15787        0.8       0.67      0.752       0.55

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/25      6.87G     0.7197     0.5724      1.115         41        800: 100%|██████████| 286/286 [02:40<00:00,  1.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:06<00:00,  2.20it/s]

                   all       5823      15787      0.813       0.66      0.756      0.556






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/25      6.85G     0.7072     0.5543       1.11         56        800: 100%|██████████| 286/286 [02:40<00:00,  1.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:05<00:00,  2.22it/s]

                   all       5823      15787      0.788      0.686      0.757      0.559






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/25      6.85G     0.6893     0.5247      1.096         49        800: 100%|██████████| 286/286 [02:39<00:00,  1.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:07<00:00,  2.15it/s]

                   all       5823      15787      0.804       0.68      0.762      0.564






25 epochs completed in 1.617 hours.
Optimizer stripped from runs/detect/train6/weights/last.pt, 22.5MB
Optimizer stripped from runs/detect/train6/weights/best.pt, 22.5MB

Validating runs/detect/train6/weights/best.pt...
Ultralytics 8.3.134 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,133,324 parameters, 0 gradients, 28.5 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 146/146 [01:09<00:00,  2.09it/s]


                   all       5823      15787      0.804       0.68      0.762      0.564
             aeroplane        348        484      0.889      0.723      0.819      0.617
               bicycle        290        380      0.871      0.708      0.808      0.599
                  bird        374        629      0.817      0.633      0.739      0.522
                  boat        252        491      0.723      0.519      0.615      0.373
                bottle        369        733      0.775      0.539      0.636      0.439
                   bus        211        320       0.88      0.778      0.859      0.715
                   car        608       1173      0.831      0.716      0.809      0.596
                   cat        544        618      0.828       0.85      0.888      0.696
                 chair        642       1449      0.696      0.521      0.612      0.434
                   cow        154        347      0.797        0.7      0.775      0.588
           diningtabl

In [None]:
# Оценка модели на валидационной выборке
metrics = model_improved.val(data=data_yaml)


print(f"mAP50: {metrics.box.map50}") 
print(f"mAP50-95: {metrics.box.map}")  

Ultralytics 8.3.134 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,133,324 parameters, 0 gradients, 28.5 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1247.7±657.2 MB/s, size: 120.0 KB)


[34m[1mval: [0mScanning /path/to/pascal_voc/labels/val.cache... 5823 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5823/5823 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 292/292 [01:35<00:00,  3.07it/s]


                   all       5823      15787      0.803      0.681      0.762      0.564
             aeroplane        348        484      0.892      0.725       0.82      0.615
               bicycle        290        380      0.877      0.711      0.811      0.601
                  bird        374        629      0.814      0.635       0.74      0.525
                  boat        252        491      0.727      0.521      0.616      0.373
                bottle        369        733      0.776      0.539      0.637       0.44
                   bus        211        320      0.876      0.781      0.859      0.717
                   car        608       1173      0.832      0.715      0.809      0.597
                   cat        544        618      0.827      0.846      0.886      0.695
                 chair        642       1449      0.693      0.521      0.613      0.434
                   cow        154        347      0.793      0.697      0.775      0.589
           diningtabl

*Стандартный бейзлайн: mAP50: 0.7268463727488651, mAP50-95: 0.5317381328246815*

*Улучшенный бейзлайн: mAP50: 0.7621538588538207, mAP50-95: 0.5644732262758688*

Вывод: гипотеза оказалась верна.

## Имплементация собственной модели

### Полная реализация сложных моделей слишком трудоёмка, поэтому пробую упрощённую модель на основе (CNN), которая будет предсказывать один bounding box и класс объекта на изображение. 

In [14]:
import torch
import torch.nn as nn

class SimpleDetector(nn.Module):
    def __init__(self, num_classes=20):
        super(SimpleDetector, self).__init__()
        # Свёрточные слои
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 224 -> 112
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 112 -> 56
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)   # 56 -> 28
        )
        # Полносвязные слои
        self.classifier = nn.Linear(64 * 28 * 28, num_classes)  # Предсказание класса
        self.bbox_regressor = nn.Linear(64 * 28 * 28, 4)        # Предсказание bounding box

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Вытягиваем в вектор
        class_pred = self.classifier(x)
        bbox_pred = self.bbox_regressor(x)
        return class_pred, bbox_pred

In [None]:
import torch
import os
from PIL import Image
import xml.etree.ElementTree as ET
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

# Список классов Pascal VOC
voc_classes = [
    'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
    'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

class VOCDataset(Dataset):
    def __init__(self, data_dir, image_set='train', transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_set = image_set  # 'train' или 'val'


        self.images_dir = os.path.join(data_dir, 'VOCdevkit', 'VOC2012', 'JPEGImages')
        self.annotations_dir = os.path.join(data_dir, 'VOCdevkit', 'VOC2012', 'Annotations')
        self.image_set_file = os.path.join(data_dir, 'VOCdevkit', 'VOC2012', 'ImageSets', 'Main', f'{image_set}.txt')


        self.image_ids = []
        with open(self.image_set_file, 'r') as f:
            self.image_ids = [line.strip() for line in f if line.strip()]

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_id = self.image_ids[idx]


        img_path = os.path.join(self.images_dir, f'{img_id}.jpg')
        image = Image.open(img_path).convert('RGB')
        img_width, img_height = image.size


        ann_path = os.path.join(self.annotations_dir, f'{img_id}.xml')
        tree = ET.parse(ann_path)
        root = tree.getroot()

        # Для упрощения берём первый объект 
        obj = root.find('object')
        if obj is None:
            # Если объектов нет, возвращаем пустые данные
            class_id = 0
            bbox = torch.zeros(4)
        else:
            class_name = obj.find('name').text
            class_id = voc_classes.index(class_name) if class_name in voc_classes else 0

            # Координаты bounding box
            bndbox = obj.find('bndbox')
            xmin = float(bndbox.find('xmin').text)
            ymin = float(bndbox.find('ymin').text)
            xmax = float(bndbox.find('xmax').text)
            ymax = float(bndbox.find('ymax').text)

            # Нормализация координат [x_center, y_center, width, height]
            x_center = (xmin + xmax) / 2 / img_width
            y_center = (ymin + ymax) / 2 / img_height
            width = (xmax - xmin) / img_width
            height = (ymax - ymin) / img_height
            bbox = torch.tensor([x_center, y_center, width, height])

        # Применение трансформаций
        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(class_id, dtype=torch.long), bbox

# Трансформации
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Датасет и загрузчик
dataset = VOCDataset(data_dir='/path/to/pascal_voc/', image_set='train', transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Проверка
print(f"Количество изображений в датасете: {len(dataset)}")

# Модель, потери и оптимизатор
model = SimpleDetector(num_classes=20)
criterion_class = nn.CrossEntropyLoss()
criterion_bbox = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Цикл обучения
for epoch in range(10):
    for images, labels, bboxes in dataloader:
        optimizer.zero_grad()
        class_pred, bbox_pred = model(images)
        loss_class = criterion_class(class_pred, labels)
        loss_bbox = criterion_bbox(bbox_pred, bboxes)
        loss = loss_class + loss_bbox
        loss.backward()
        optimizer.step()
    print(f"Эпоха {epoch+1}, Loss: {loss.item()}")

Количество изображений в датасете: 5717
Эпоха 1, Loss: 2.77070951461792
Эпоха 2, Loss: 2.440721035003662
Эпоха 3, Loss: 1.8826191425323486
Эпоха 4, Loss: 1.4468070268630981
Эпоха 5, Loss: 1.30607008934021
Эпоха 6, Loss: 1.1855098009109497
Эпоха 7, Loss: 0.617194652557373
Эпоха 8, Loss: 0.3829125165939331
Эпоха 9, Loss: 0.3474341928958893
Эпоха 10, Loss: 0.11439548432826996


In [20]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Downloading torchmetrics-1.7.1-py3-none-any.whl (961 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m961.5/961.5 kB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.14.3-py3-none-any.whl (28 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.14.3 torchmetrics-1.7.1


In [24]:
from torchmetrics.detection import MeanAveragePrecision

# Валидационный датасет и загрузчик
val_dataset = VOCDataset(data_dir='/path/to/pascal_voc/', image_set='val', transform=transform)
test_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Оценка
model.eval()
map_metric = MeanAveragePrecision()
with torch.no_grad():
    for images, labels, bboxes in test_dataloader:
        class_pred, bbox_pred = model(images)
        preds = [{'boxes': bbox_pred, 'scores': torch.softmax(class_pred, dim=1).max(dim=1)[0], 'labels': class_pred.argmax(dim=1)}]
        targets = [{'boxes': bboxes, 'labels': labels}]
        map_metric.update(preds, targets)
mAP = map_metric.compute()['map'].item()
print(f"mAP модели: {mAP}")

mAP модели: 0.00014734633441548795


### Выводы: имплементированная модель показала себя хуже YOLAv8n. Ограничение числа эпох, абсолютная разная сложность моделей, а также, возможно, неправильная настройка параметров сыграли свою роль.