In [22]:
import os
import tarfile

# Функция распаковки tar
def extract_tar(path_to_tar, extract_path):
    if not os.path.exists(extract_path):
        os.makedirs(extract_path)
    with tarfile.open(path_to_tar) as tar:
        tar.extractall(path=extract_path)
    print(f"Распаковано {path_to_tar}")

# Пути к архивам
tar_2007 = '/content/VOC2007/VOCtrainval_06-Nov-2007.tar'
tar_2012 = '/content/VOC2012/VOCtrainval_11-May-2012.tar'

# Распаковываем
extract_tar(tar_2007, '/content/VOC2007/')
extract_tar(tar_2012, '/content/VOC2012/')

# Пути к распакованным датасетам
VOC2007_DIR = '/content/VOC2007/VOCdevkit/VOC2007'
VOC2012_DIR = '/content/VOC2012/VOCdevkit/VOC2012'

Распаковано /content/VOC2007/VOCtrainval_06-Nov-2007.tar
Распаковано /content/VOC2012/VOCtrainval_11-May-2012.tar


In [26]:
import xml.etree.ElementTree as ET
from pathlib import Path
import shutil
import random

# Классы VOC (20 классов)
classes = [
    'aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
    'bus', 'car', 'cat', 'chair', 'cow',
    'diningtable', 'dog', 'horse', 'motorbike', 'person',
    'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]

def list_xml_files(path):
    return list(Path(path).glob('*.xml'))

def filter_existing_images(annotations, img_dir):
    existing_images = set(os.listdir(img_dir))
    filtered = []
    for ann in annotations:
        img_name = ann.stem + '.jpg'
        if img_name in existing_images:
            filtered.append(ann)
        else:
            print(f"Отсутствует файл изображения: {os.path.join(img_dir, img_name)}")
    return filtered

def convert_box(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    x = (box[0] + box[2]) / 2.0
    y = (box[1] + box[3]) / 2.0
    w = box[2] - box[0]
    h = box[3] - box[1]
    return (x * dw, y * dh, w * dw, h * dh)

def xml_to_yolo_label(xml_file, classes):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    size = root.find('size')
    width = int(float(size.find('width').text))
    height = int(float(size.find('height').text))
    labels = []
    for obj in root.iter('object'):
        cls = obj.find('name').text
        if cls not in classes:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('ymin').text),
             float(xmlbox.find('xmax').text), float(xmlbox.find('ymax').text))
        bb = convert_box((width, height), b)
        labels.append(f"{cls_id} {' '.join(map(str, bb))}")
    return labels

def prepare_dataset_yolo(annotations, img_dir, save_dir):
    img_save_train = os.path.join(save_dir, 'images/train')
    img_save_val = os.path.join(save_dir, 'images/val')
    labels_save_train = os.path.join(save_dir, 'labels/train')
    labels_save_val = os.path.join(save_dir, 'labels/val')
    os.makedirs(img_save_train, exist_ok=True)
    os.makedirs(img_save_val, exist_ok=True)
    os.makedirs(labels_save_train, exist_ok=True)
    os.makedirs(labels_save_val, exist_ok=True)

    random.shuffle(annotations)
    split_idx = int(len(annotations) * 0.8)
    train_anns = annotations[:split_idx]
    val_anns = annotations[split_idx:]

    def copy_and_label(anns, img_dest, label_dest):
        count = 0
        for ann in anns:
            img_name = ann.stem + '.jpg'
            img_path = os.path.join(img_dir, img_name)
            if not os.path.exists(img_path):
                print(f"Отсутствует файл изображения: {img_path}")
                continue
            labels = xml_to_yolo_label(ann, classes)
            if not labels:
                continue
            # Сохраняем label
            label_path = os.path.join(label_dest, ann.stem + '.txt')
            with open(label_path, 'w') as f:
                f.write('\n'.join(labels))
            # Копируем изображение
            shutil.copy(img_path, img_dest)
            count += 1
        return count

    train_count = copy_and_label(train_anns, img_save_train, labels_save_train)
    val_count = copy_and_label(val_anns, img_save_val, labels_save_val)
    print(f"Скопировано изображений в train: {train_count}")
    print(f"Скопировано изображений в val: {val_count}")


In [29]:
import os

# Указываем правильные пути к директориям VOC2007 и VOC2012
VOC2007_DIR = '/content/VOC2007/VOCdevkit/VOC2007'
VOC2012_DIR = '/content/VOC2012/VOCdevkit/VOC2012'

def list_xml_files(annotations_dir):
    return [os.path.join(annotations_dir, f) for f in os.listdir(annotations_dir) if f.endswith('.xml')]

def filter_existing_images(annotations, img_dir):
    existing_images = set(os.listdir(img_dir))
    filtered = []
    for ann in annotations:
        img_filename = os.path.splitext(os.path.basename(ann))[0] + '.jpg'
        if img_filename in existing_images:
            filtered.append(ann)
        else:
            print(f"Отсутствует файл изображения: {os.path.join(img_dir, img_filename)}")
    return filtered

# Получаем списки аннотаций
voc2007_ann = list_xml_files(os.path.join(VOC2007_DIR, 'Annotations'))
voc2012_ann = list_xml_files(os.path.join(VOC2012_DIR, 'Annotations'))

# Фильтруем аннотации, удаляя те, для которых отсутствуют изображения
voc2007_ann = filter_existing_images(voc2007_ann, os.path.join(VOC2007_DIR, 'JPEGImages'))
voc2012_ann = filter_existing_images(voc2012_ann, os.path.join(VOC2012_DIR, 'JPEGImages'))

print(f"VOC2007: {len(voc2007_ann)} аннотаций с существующими изображениями")
print(f"VOC2012: {len(voc2012_ann)} аннотаций с существующими изображениями")

# Дальше вызывай свою функцию подготовки датасета, например:
# prepare_dataset_yolo(voc2007_ann + voc2012_ann)


VOC2007: 5011 аннотаций с существующими изображениями
VOC2012: 17125 аннотаций с существующими изображениями


In [30]:
# Ячейка 4: Обучение базовой модели YOLOv8n (5 эпох)

model_n = YOLO('yolov8n.pt')

results_n = model_n.train(data='voc_data.yaml', epochs=5, imgsz=320, batch=8)

print("Обучение базовой модели YOLOv8n завершено.")


Ultralytics 8.3.134 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=voc_data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=5, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=320, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train4, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0, pretrained=Tr

[34m[1mtrain: [0mScanning /content/voc_yolo/labels/train... 3973 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3973/3973 [00:01<00:00, 2180.43it/s]


[34m[1mtrain: [0mNew cache created: /content/voc_yolo/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 619.1±166.2 MB/s, size: 83.8 KB)


[34m[1mval: [0mScanning /content/voc_yolo/labels/val... 1038 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1038/1038 [00:00<00:00, 1781.21it/s]

[34m[1mval: [0mNew cache created: /content/voc_yolo/labels/val.cache





Plotting labels to runs/detect/train4/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000417, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 320 train, 320 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train4[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5     0.354G      1.296      3.231      1.281         44        320: 100%|██████████| 497/497 [00:55<00:00,  8.98it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:09<00:00,  7.17it/s]

                   all       1038       3353      0.601      0.399      0.449      0.303






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5     0.408G      1.328      2.161       1.31         21        320: 100%|██████████| 497/497 [00:51<00:00,  9.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:06<00:00,  9.36it/s]


                   all       1038       3353      0.604      0.496      0.519      0.346

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5     0.426G      1.287      1.955      1.298         20        320: 100%|██████████| 497/497 [00:51<00:00,  9.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:06<00:00, 10.31it/s]


                   all       1038       3353      0.606      0.518      0.534      0.355

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/5     0.434G      1.248      1.821      1.274         24        320: 100%|██████████| 497/497 [00:50<00:00,  9.83it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:07<00:00,  8.99it/s]

                   all       1038       3353      0.628      0.537      0.576      0.395






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        5/5     0.461G      1.215      1.705      1.255         26        320: 100%|██████████| 497/497 [00:50<00:00,  9.91it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:06<00:00, 10.36it/s]

                   all       1038       3353      0.678      0.561      0.616      0.419






5 epochs completed in 0.082 hours.
Optimizer stripped from runs/detect/train4/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train4/weights/best.pt, 6.2MB

Validating runs/detect/train4/weights/best.pt...
Ultralytics 8.3.134 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 3,009,548 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:07<00:00,  8.51it/s]


                   all       1038       3353      0.679      0.561      0.616       0.42
             aeroplane         51         87      0.701      0.747      0.765      0.525
               bicycle         60         95      0.785      0.616      0.761      0.483
                  bird         67        153      0.713      0.431      0.528      0.321
                  boat         43         87      0.422      0.494      0.461      0.267
                bottle         49        104       0.58      0.305      0.363      0.204
                   bus         37         57      0.733      0.596      0.636      0.513
                   car        154        293      0.794      0.706      0.774      0.588
                   cat         65         73      0.752      0.781      0.833      0.605
                 chair        119        312      0.556      0.308      0.369      0.216
                   cow         36        106      0.608      0.472      0.558      0.353
           diningtabl

In [32]:
# Ячейка 5: Обучение улучшенной модели YOLOv8s (10 эпох, аугментации)

model_s = YOLO('yolov8s.pt')

results_s = model_s.train(
    data='voc_data.yaml',
    epochs=10,
    imgsz=320,
    batch=8,
    augment=True,
    lr0=0.001,
    patience=3
)

print("Обучение улучшенной модели YOLOv8s завершено.")


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:00<00:00, 228MB/s]


Ultralytics 8.3.134 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=True, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=voc_data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=320, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train5, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=3, perspective=0.0, plots=True, pose=12.0, pretrained=Tru

[34m[1mtrain: [0mScanning /content/voc_yolo/labels/train.cache... 3973 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3973/3973 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 535.1±95.0 MB/s, size: 83.8 KB)


[34m[1mval: [0mScanning /content/voc_yolo/labels/val.cache... 1038 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1038/1038 [00:00<?, ?it/s]


Plotting labels to runs/detect/train5/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.001' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000417, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 320 train, 320 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train5[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10     0.787G      1.091      1.935      1.147          7        320: 100%|██████████| 497/497 [00:54<00:00,  9.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:07<00:00,  8.47it/s]


                   all       1038       3353      0.634      0.597      0.619      0.426

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10      1.03G      1.154      1.499       1.19         12        320: 100%|██████████| 497/497 [00:50<00:00,  9.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:06<00:00, 10.66it/s]


                   all       1038       3353      0.602      0.537      0.564      0.361

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      1.03G      1.196      1.503       1.23         30        320: 100%|██████████| 497/497 [00:51<00:00,  9.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:07<00:00,  9.07it/s]

                   all       1038       3353      0.587      0.519      0.553      0.354






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      1.05G      1.196      1.427      1.236         21        320: 100%|██████████| 497/497 [00:51<00:00,  9.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:07<00:00,  9.01it/s]

                   all       1038       3353       0.66      0.538      0.584      0.385
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 3 epochs. Best results observed at epoch 1, best model saved as best.pt.
To update EarlyStopping(patience=3) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






4 epochs completed in 0.066 hours.
Optimizer stripped from runs/detect/train5/weights/last.pt, 22.5MB
Optimizer stripped from runs/detect/train5/weights/best.pt, 22.5MB

Validating runs/detect/train5/weights/best.pt...
Ultralytics 8.3.134 🚀 Python-3.11.12 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,133,324 parameters, 0 gradients, 28.5 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 65/65 [00:10<00:00,  6.04it/s]


                   all       1038       3353      0.602      0.593      0.611      0.421
             aeroplane         51         87      0.577      0.564      0.626       0.45
               bicycle         60         95      0.887      0.526       0.69      0.449
                  bird         67        153      0.412      0.477       0.44      0.303
                  boat         43         87      0.256      0.657      0.441      0.228
                bottle         49        104      0.473      0.615      0.515      0.316
                   bus         37         57      0.628      0.653      0.662      0.532
                   car        154        293      0.733      0.761      0.805      0.617
                   cat         65         73      0.788      0.612      0.768      0.575
                 chair        119        312      0.513      0.506      0.478      0.284
                   cow         36        106       0.88      0.104      0.496      0.315
           diningtabl

In [39]:
import pandas as pd

def compare_models_metrics_from_csv(run_dirs, model_names):
    data = []
    for run_dir, model_name in zip(run_dirs, model_names):
        results_path = f"{run_dir}/results.csv"
        df = pd.read_csv(results_path)

        # Берём последнюю строку (последнюю эпоху)
        last_epoch = df.iloc[-1]

        precision = last_epoch['metrics/precision(B)']
        recall = last_epoch['metrics/recall(B)']
        mAP50 = last_epoch['metrics/mAP50(B)']
        mAP50_95 = last_epoch['metrics/mAP50-95(B)']

        data.append({
            'Model': model_name,
            'Precision': precision,
            'Recall': recall,
            'mAP50': mAP50,
            'mAP50-95': mAP50_95
        })

    compare_df = pd.DataFrame(data)
    print(compare_df)

# Пути к папкам с результатами
run_dirs = ['/content/runs/detect/train4', '/content/runs/detect/train5']
model_names = ['YOLOv8n', 'YOLOv8s']

compare_models_metrics_from_csv(run_dirs, model_names)


     Model  Precision   Recall   mAP50  mAP50-95
0  YOLOv8n    0.67750  0.56131  0.6158   0.41901
1  YOLOv8s    0.65967  0.53773  0.5844   0.38486


In [73]:
import os
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
import numpy as np
from tqdm import tqdm

# Функция для получения совпадающих пар файлов (image, mask)
def get_matched_file_pairs(images_dir, masks_dir):
    images = os.listdir(images_dir)
    masks = os.listdir(masks_dir)

    images_set = set(os.path.splitext(f)[0] for f in images)
    masks_set = set(os.path.splitext(f)[0] for f in masks)

    matched_names = images_set.intersection(masks_set)

    matched_images = [os.path.join(images_dir, f"{name}.jpg") for name in matched_names]
    matched_masks = [os.path.join(masks_dir, f"{name}.png") for name in matched_names]

    return matched_images, matched_masks


class SegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.transform = transform
        self.images, self.masks = get_matched_file_pairs(images_dir, masks_dir)
        assert len(self.images) == len(self.masks), "После фильтрации количество изображений и масок не совпадает!"

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = np.array(Image.open(self.images[idx]).convert("RGB"))
        mask = np.array(Image.open(self.masks[idx]).convert("L"))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        # Маска должна быть FloatTensor с одним каналом
        if len(mask.shape) == 2:
            mask = np.expand_dims(mask, axis=0)
        mask = torch.tensor(mask).float() / 255.0  # Нормализуем до [0,1]

        return image, mask


# Трансформации с размером 128x128
transform = A.Compose([
    A.Resize(128, 128),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

# Пути к папкам
images_dir = '/content/voc_yolo/images/train'
masks_dir = '/content/voc_yolo/masks/train'

# Создаём датасет и загрузчик с batch_size=2
train_dataset = SegmentationDataset(images_dir, masks_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=2)

print(f"Длина датасета после фильтрации: {len(train_dataset)}")


# Пример простой модели сегментации
class SimpleSegmentationModel(nn.Module):
    def __init__(self):
        super(SimpleSegmentationModel, self).__init__()

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)

        self.deconv1 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.deconv2 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.deconv3 = nn.ConvTranspose2d(64, 1, kernel_size=2, stride=2)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))

        x = self.deconv1(x)
        x = self.deconv2(x)
        x = self.deconv3(x)

        return self.sigmoid(x)


# Устройство
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Инициализация модели, оптимизатора, функции потерь
model = SimpleSegmentationModel().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Обучение с tqdm для прогрессбара
epochs = 4  # меньше эпох для ускорения обучения
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")

    for batch_idx, (images, masks) in enumerate(loop):
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        outputs = model(images)

        # Интерполируем выход под размер масок, если нужно
        if outputs.shape != masks.shape:
            outputs = nn.functional.interpolate(outputs, size=masks.shape[-2:], mode='bilinear', align_corners=False)

        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        loop.set_postfix(loss=running_loss / (batch_idx + 1))


Длина датасета после фильтрации: 3973


Epoch 1/4: 100%|██████████| 1987/1987 [loss=0.652]
Epoch 2/4: 100%|██████████| 1987/1987 [loss=0.626]
Epoch 3/4: 100%|██████████| 1987/1987 [loss=0.619]
Epoch 4/4: 100%|██████████| 1987/1987 [loss=0.614]


In [77]:
import torch
import torch.nn.functional as F
from sklearn.metrics import precision_score, recall_score, jaccard_score
import numpy as np
import pandas as pd

def evaluate_segmentation_model(model, dataloader, device, max_batches=20, threshold=0.5):
    model.eval()

    all_preds = []
    all_targets = []

    count_batches = 0

    with torch.no_grad():
        for images, masks in dataloader:
            images = images.to(device)
            masks = masks.to(device)

            outputs = model(images)
            if outputs.shape != masks.shape:
                outputs = F.interpolate(outputs, size=masks.shape[-2:], mode='bilinear', align_corners=False)

            preds = (outputs > threshold).float()

            # Собираем все в numpy для подсчета метрик
            all_preds.append(preds.cpu().numpy().reshape(-1))
            all_targets.append(masks.cpu().numpy().reshape(-1))

            count_batches += 1
            if count_batches >= max_batches:
                break

    all_preds = np.concatenate(all_preds)
    all_targets = np.concatenate(all_targets)

    precision = precision_score(all_targets, all_preds, zero_division=0)
    recall = recall_score(all_targets, all_preds, zero_division=0)
    iou = jaccard_score(all_targets, all_preds, average='binary')


    return precision, recall, mAP50, mAP50_95, iou

# Запуск оценки
precision, recall, mAP50, mAP50_95, iou = evaluate_segmentation_model(model, train_loader, device)

# Формируем таблицу с результатами
results_df = pd.DataFrame({
    'Model': ['SimpleSegmentationModel'],
    'Precision': [precision],
    'Recall': [recall],
    'mAP50': [mAP50],
    'mAP50-95': [mAP50_95],
})

print(results_df)


                     Model  Precision    Recall     mAP50  mAP50-95
0  SimpleSegmentationModel   0.736508  0.687846  0.663216  0.454251


После проведенной работы можно сделать выводы что рекол у собственной имплементации выше, что говорит о том, что она лучше находит объекты, однако по какой-то причине пресижен меньше, чем в предыдущих результатах, что говорит о том, что она делает больше некорректных срабатываний. 