# Лабораторная работа 7, Самсонов Савелий Артёмович М8О-406Б-21

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Выбор датасета

Датасет DUTS – один из наиболее популярных датасетов для выделения значимых объектов (salient object detection) и семантической сегментации. Он содержит 10553 тренировочных и 5019 тестовых изображений с высококачественными масками сегментации. Вот ключевые направления, где он может быть полезен на практике:

1. Обучение и тестирование моделей сегментации

    Возможное применение:
    - Медицинская сегментация (адаптация предобученных на DUTS моделей для анализа рентгеновских снимков).

2. Улучшение качества предобученных моделей

    Модели, обученные на синтетических данных, плохо работают на реальных изображениях. Большое разнообразие сцен (естественные, урбанистические, интерьеры) помогает улучшить обобщающую способность.
    
    Возможное применение:
    - Автоматическое выделение товаров на фото для маркетплейсов (Wildberries, AliExpress).

3. Разработка алгоритмов для мобильных и edge-устройств

    Тяжелые модели (например, HRNet) неэффективны на смартфонах и камерах видеонаблюдения. DUTS позволяет обучать lightweight-модели (MobileNetV3 + DeepLabV3 Lite) благодаря четким границам объектов и оптимизировать под real-time обработку (например, для AR-приложений).
    
    Возможное применение:
    - Сегментация дорожной сцены в автономных дронах.

4. Удаление фона и генеративный AI

    Сервисы вроде Remove.bg требуют точного выделения переднего плана. DUTS Идеально подходит для обучения автоматического удаления фона.

    Возможное применение:
    - Инструменты для дизайнеров (Canva, Photoshop Neural Filters).

### Выбор метрик

1. Intersection over Union (IoU)
    
    Считает, какая часть предсказанного объекта перекрывается с реальным. Применяется, если важна геометрическая точность.

2. Dice Coefficient

    Учитывает и правильно найденные пиксели, и ошибки (пропуски и ложные срабатывания).

3. F1-мера
    
    Как и dice, объединяет две вещи: точность (сколько из выделенного действительно является объектом) и полноту (сколько объекта модель вообще обнаружила).

## 2. Создание бейзлайна и оценка качества

Импорт библиотек

In [None]:
import os
import numpy as np
import cv2
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
import torch.optim as optim
from torch import nn
from PIL import Image

import numpy as np
from sklearn.metrics import jaccard_score, f1_score

Опишем класс для датасета

In [None]:
class DUTSDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None, mask_transform=None, augm_transform=None, limit=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.image_names = os.listdir(images_dir)

        if limit is not None:
            self.image_names = self.image_names[:limit]

        self.transform = transform
        self.mask_transform = mask_transform
        self.augm_transform = augm_transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        image_name = self.image_names[idx]
        image_path = os.path.join(self.images_dir, image_name)
        mask_path = os.path.join(self.masks_dir, image_name)

        image = cv2.imread(image_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if image is None:
            raise ValueError(f"Ошибка при загрузке изображения: {image_path}")
        mask_path_with_extension = mask_path.split('.')[0] + '.png'
        mask = cv2.imread(mask_path_with_extension, cv2.IMREAD_GRAYSCALE)
        if mask is None:
            raise ValueError(f"Ошибка при загрузке маски: {mask_path}")

        image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        mask = Image.fromarray(mask)

        if self.augm_transform:
            transformed = self.augm_transform(image=np.array(image), mask=np.array(mask))
            image = Image.fromarray(transformed['image'])
            mask = Image.fromarray(transformed['mask'])

        if self.transform:
            image = self.transform(image)
        if self.mask_transform:
            mask = self.mask_transform(mask)

        return image, mask

Подготовка данных

In [None]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

mask_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])


images_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TR/DUTS-TR-Image'
masks_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TR/DUTS-TR-Mask'

train_dataset = DUTSDataset(images_dir, masks_dir, transform=transform, mask_transform=mask_transform, limit=1000)
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)

Загрузим библиотеку с моделями

In [None]:
!pip install segmentation_models_pytorch

Collecting segmentation_models_pytorch
  Downloading segmentation_models_pytorch-0.5.0-py3-none-any.whl.metadata (17 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8->segmentation_models_pytorch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8->segmentation_models_pytorch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8->segmentation_models_pytorch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8->segmentation_models_pytorch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8->segmentation_models_pytorch)
  Downloading nvidia_cublas_cu12-12.4.5.8-

Импортируем библиотеку с моделями

In [None]:
import segmentation_models_pytorch as smp

Определяем сверточную модель

In [None]:
model = smp.Linknet(
    encoder_name="resnet34",       # Предобученный энкодер ResNet34
    encoder_weights="imagenet",    # Использование весов ImageNet для энкодера
    in_channels=3,                 # 3 канала для RGB изображений
    classes=1,                     # Для бинарной сегментации (объект или фон)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.BCEWithLogitsLoss()

optimizer = optim.Adam(model.parameters(), lr=1e-4)

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /root/.cache/torch/hub/checkpoints/resnet34-333f7ec4.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 177MB/s]


Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for images, masks in tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = model(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_dataloader)}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [00:24<00:00,  2.53it/s]


Epoch [1/3], Loss: 0.6659701930152045


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:25<00:00,  2.43it/s]


Epoch [2/3], Loss: 0.39640419823782785


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:25<00:00,  2.51it/s]

Epoch [3/3], Loss: 0.28170165372273276





Подготовка тестовых данных

In [None]:
test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_mask_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

test_images_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TE/DUTS-TE-Image'
test_masks_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TE/DUTS-TE-Mask'

test_dataset = DUTSDataset(test_images_dir, test_masks_dir, transform=test_transform, mask_transform=test_mask_transform, limit=700)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)


Определим функции для вычисления метрик

In [None]:
from sklearn.metrics import jaccard_score, f1_score

def iou_score(preds, targets, threshold=0.5):
    preds = (preds > threshold).int().cpu().numpy().flatten()
    targets = targets.int().cpu().numpy().flatten()
    return jaccard_score(targets, preds)

def dice_score(preds, targets, threshold=0.5):
    preds = (preds > threshold).int().cpu().numpy().flatten()
    targets = targets.int().cpu().numpy().flatten()
    return 2 * (np.sum(preds * targets)) / (np.sum(preds) + np.sum(targets) + 1e-8)

def f1_score_func(preds, targets, threshold=0.5):
    preds = (preds > threshold).int().cpu().numpy().flatten()
    targets = targets.int().cpu().numpy().flatten()
    return f1_score(targets, preds)


Оценка модели

In [None]:
model.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = model(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [08:06<00:00, 11.06s/it]

IoU: 0.6094
Dice: 0.7544
F1-score: 0.7544





Определяем трансформерную модель

In [None]:
model_trans = smp.Unet(
    encoder_name="mit_b0",
    encoder_weights="imagenet",  # Используем веса, предобученные на ImageNet
    in_channels=3,  # 3 канала для RGB изображений
    classes=1,  # Для бинарной сегментации (объект или фон)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_trans.to(device)

criterion = nn.BCEWithLogitsLoss()

optimizer = optim.Adam(model_trans.parameters(), lr=1e-4)

Обучение модели

In [None]:
from tqdm import tqdm

epochs = 3

for epoch in range(epochs):
    model_trans.train()
    running_loss = 0.0

    for images, masks in tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = model_trans(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_dataloader):.4f}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [00:26<00:00,  2.35it/s]


Epoch [1/3], Loss: 0.4590


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:25<00:00,  2.43it/s]


Epoch [2/3], Loss: 0.2451


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:26<00:00,  2.41it/s]

Epoch [3/3], Loss: 0.1905





Подготовка тестовых данных

In [None]:
test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_mask_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

test_images_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TE/DUTS-TE-Image'
test_masks_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TE/DUTS-TE-Mask'

test_dataset = DUTSDataset(test_images_dir, test_masks_dir, transform=test_transform, mask_transform=test_mask_transform, limit=200)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)


Оценка модели

In [None]:
model_trans.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = model_trans(images)

        output_probs = torch.sigmoid(outputs)

        output_probs = (output_probs > 0.5).float()

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:18<00:00,  2.36it/s]

IoU: 0.6116
Dice: 0.7567
F1-score: 0.7567





## 3. Улучшение бейзлайна

### Гипотезы

1. Аугментация данных

    Аугментация данных может значительно улучшить способность модели обобщать, снижая вероятность переобучения. Можно добавить следующие виды аугментации:
    - Геометрические преобразования: вращение, сдвиг.
    - Зеркальные отражения или.

2. Использование комбинированной функции потерь

    Комбинирование кросс-энтропии и Dice Loss может улучшить качество сегментации.


3. Смена оптимизатора

    В данный момент используется Adam, но можно попробовать другие оптимизаторы, например, AdamW.

### Свёрточная модель

##### Аугментация данных

Установим библиотеку, если её нет

In [None]:
!pip install albumentations



Зададим преобразования для изображений и масок

In [None]:
import albumentations as A

# Аугментации для изображений и масок
augm_transform = A.Compose([
    A.RandomRotate90(p=0.5),  # Случайное вращение на 90 градусов
    A.HorizontalFlip(p=0.5),  # Случайное горизонтальное отражение
    A.VerticalFlip(p=0.5),    # Случайное вертикальное отражение
], additional_targets={'mask': 'mask'})  # Добавляем маску как дополнительную цель для аугментаций


Подготовка данных

In [None]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

mask_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

images_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TR/DUTS-TR-Image'
masks_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TR/DUTS-TR-Mask'

new_train_dataset = DUTSDataset(images_dir, masks_dir, transform=transform, mask_transform=mask_transform, augm_transform=augm_transform, limit=1000)
new_train_dataloader = DataLoader(new_train_dataset, batch_size=16, shuffle=True)

Определяем сверточную модель

In [None]:
new_model = smp.Linknet(
    encoder_name="resnet34",       # Предобученный энкодер ResNet34
    encoder_weights="imagenet",    # Использование весов ImageNet для энкодера
    in_channels=3,                 # 3 канала для RGB изображений
    classes=1,                     # Для бинарной сегментации (объект или фон)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model.to(device)

criterion = nn.BCEWithLogitsLoss()

optimizer = optim.Adam(new_model.parameters(), lr=1e-4)

Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    new_model.train()
    running_loss = 0.0

    for images, masks in tqdm(new_train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = new_model(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(new_train_dataloader)}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [01:47<00:00,  1.71s/it]


Epoch [1/3], Loss: 0.5793233982154301


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:25<00:00,  2.43it/s]


Epoch [2/3], Loss: 0.317982968829927


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:25<00:00,  2.43it/s]

Epoch [3/3], Loss: 0.2272805862483524





Подготовка тестовых данных

In [None]:
test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_mask_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

test_images_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TE/DUTS-TE-Image'
test_masks_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TE/DUTS-TE-Mask'

test_dataset = DUTSDataset(test_images_dir, test_masks_dir, transform=test_transform, mask_transform=test_mask_transform, limit=200)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)


Оценка модели

In [None]:
new_model.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = new_model(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:18<00:00,  2.39it/s]

IoU: 0.5864
Dice: 0.7360
F1-score: 0.7360





##### Функция потерь

In [None]:
import torch
import torch.nn as nn

def dice_loss(pred, target, smooth=1e-6):
    pred = torch.sigmoid(pred)
    intersection = (pred * target).sum()
    union = pred.sum() + target.sum()
    dice = (2. * intersection + smooth) / (union + smooth)
    return 1 - dice

# Комбинированная функция потерь (BCE + Dice Loss)
class CombinedLoss(nn.Module):
    def __init__(self, bce_weight=0.5, dice_weight=0.5):
        super(CombinedLoss, self).__init__()
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.dice_loss = dice_loss

        self.bce_weight = bce_weight
        self.dice_weight = dice_weight

    def forward(self, pred, target):
        bce = self.bce_loss(pred, target)
        dice = self.dice_loss(pred, target)
        # Комбинируем потери с соответствующими весами
        loss = self.bce_weight * bce + self.dice_weight * dice
        return loss


Определяем модель

In [None]:
new_model = smp.Linknet(
    encoder_name="resnet34",       # Предобученный энкодер ResNet34
    encoder_weights="imagenet",    # Использование весов ImageNet для энкодера
    in_channels=3,                 # 3 канала для RGB изображений
    classes=1,                     # Для бинарной сегментации (объект или фон)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model.to(device)

# Инициализация комбинированной функции потерь
criterion = CombinedLoss(bce_weight=0.5, dice_weight=0.5)

optimizer = optim.Adam(new_model.parameters(), lr=1e-4)

Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    new_model.train()
    running_loss = 0.0

    for images, masks in tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = new_model(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_dataloader)}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [10:41<00:00, 10.18s/it]


Epoch [1/3], Loss: 0.5012511119009957


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:24<00:00,  2.57it/s]


Epoch [2/3], Loss: 0.3118932339407149


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:24<00:00,  2.56it/s]

Epoch [3/3], Loss: 0.23110283036080617





Оценка модели

In [None]:
new_model.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = new_model(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [07:46<00:00, 10.60s/it]

IoU: 0.6044
Dice: 0.7507
F1-score: 0.7507





##### Оптимизатор

In [None]:
new_model = smp.Linknet(
    encoder_name="resnet34",       # Предобученный энкодер ResNet34
    encoder_weights="imagenet",    # Использование весов ImageNet для энкодера
    in_channels=3,                 # 3 канала для RGB изображений
    classes=1,                     # Для бинарной сегментации (объект или фон)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model.to(device)

criterion = nn.BCEWithLogitsLoss()

optimizer = torch.optim.AdamW(new_model.parameters(), lr=1e-4, weight_decay=1e-5)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    new_model.train()
    running_loss = 0.0

    for images, masks in tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = new_model(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_dataloader)}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [14:23<00:00, 13.71s/it]


Epoch [1/3], Loss: 0.568998604539841


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:28<00:00,  2.19it/s]


Epoch [2/3], Loss: 0.31880672914641245


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:30<00:00,  2.09it/s]

Epoch [3/3], Loss: 0.21685697516751667





Оценка модели

In [None]:
new_model.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = new_model(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [09:52<00:00, 13.46s/it]

IoU: 0.6303
Dice: 0.7708
F1-score: 0.7708





Из всех вариантов смена оптимизатора внесла улучшение

### Трансформерная модель

##### Аугментация данных

In [None]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

mask_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

images_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TR/DUTS-TR-Image'
masks_dir = '/content/drive/MyDrive/Colab Notebooks/multimedia/DUTS-TR/DUTS-TR-Mask'

new_train_dataset = DUTSDataset(images_dir, masks_dir, transform=transform, mask_transform=mask_transform, augm_transform=augm_transform, limit=1000)
new_train_dataloader = DataLoader(new_train_dataset, batch_size=16, shuffle=True)

Определяем модель

In [None]:
new_model_trans = smp.Unet(
    encoder_name="mit_b0",
    encoder_weights="imagenet",  # Используем веса, предобученные на ImageNet
    in_channels=3,  # 3 канала для RGB изображений
    classes=1,  # Для бинарной сегментации (объект или фон)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model_trans.to(device)

criterion = nn.BCEWithLogitsLoss()

optimizer = optim.Adam(new_model_trans.parameters(), lr=1e-4)

Downloading: "https://github.com/qubvel/segmentation_models.pytorch/releases/download/v0.0.2/mit_b0.pth" to /root/.cache/torch/hub/checkpoints/mit_b0.pth
100%|██████████| 13.7M/13.7M [00:00<00:00, 94.3MB/s]


Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    new_model_trans.train()
    running_loss = 0.0

    for images, masks in tqdm(new_train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = new_model_trans(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(new_train_dataloader)}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [03:08<00:00,  2.99s/it]


Epoch [1/3], Loss: 0.3689750787757692


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:27<00:00,  2.29it/s]


Epoch [2/3], Loss: 0.22644309510314276


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:27<00:00,  2.31it/s]

Epoch [3/3], Loss: 0.17954744871646638





Оценка модели

In [None]:
new_model_trans.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = new_model_trans(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:18<00:00,  2.43it/s]

IoU: 0.6174
Dice: 0.7615
F1-score: 0.7615





##### Функция потерь

In [None]:
import torch
import torch.nn as nn

def dice_loss(pred, target, smooth=1e-6):
    pred = torch.sigmoid(pred)
    intersection = (pred * target).sum()
    union = pred.sum() + target.sum()
    dice = (2. * intersection + smooth) / (union + smooth)
    return 1 - dice

class CombinedLoss(nn.Module):
    def __init__(self, bce_weight=0.5, dice_weight=0.5):
        super(CombinedLoss, self).__init__()
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.dice_loss = dice_loss

        self.bce_weight = bce_weight
        self.dice_weight = dice_weight

    def forward(self, pred, target):
        bce = self.bce_loss(pred, target)
        dice = self.dice_loss(pred, target)
        # Комбинируем потери с соответствующими весами
        loss = self.bce_weight * bce + self.dice_weight * dice
        return loss


Определяем модель

In [None]:
new_model_1_trans = smp.Unet(
    encoder_name="mit_b0",
    encoder_weights="imagenet",  # Используем веса, предобученные на ImageNet
    in_channels=3,  # 3 канала для RGB изображений
    classes=1,  # Для бинарной сегментации (объект или фон)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model_1_trans.to(device)

# Инициализация комбинированной функции потерь
criterion = CombinedLoss(bce_weight=0.5, dice_weight=0.5)

optimizer = optim.Adam(new_model_1_trans.parameters(), lr=1e-4)

Обучение модели

In [None]:
from tqdm import tqdm

epochs = 3

for epoch in range(epochs):
    new_model_1_trans.train()
    running_loss = 0.0

    for images, masks in tqdm(new_train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = new_model_1_trans(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(new_train_dataloader):.4f}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [00:27<00:00,  2.32it/s]


Epoch [1/3], Loss: 0.4698


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:27<00:00,  2.31it/s]


Epoch [2/3], Loss: 0.2993


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:27<00:00,  2.32it/s]

Epoch [3/3], Loss: 0.2338





Оценка модели

In [None]:
new_model_1_trans.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = new_model_1_trans(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:18<00:00,  2.42it/s]

IoU: 0.5997
Dice: 0.7464
F1-score: 0.7464





##### Оптимизатор

In [None]:
new_model_1_trans = smp.Unet(
    encoder_name="mit_b0",
    encoder_weights="imagenet",  # Используем веса, предобученные на ImageNet
    in_channels=3,  # 3 канала для RGB изображений
    classes=1,  # Для бинарной сегментации (объект или фон)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model_1_trans.to(device)

criterion = nn.BCEWithLogitsLoss()

optimizer = torch.optim.AdamW(new_model_1_trans.parameters(), lr=1e-4, weight_decay=1e-5)

Обучение модели

In [None]:
from tqdm import tqdm

epochs = 3

for epoch in range(epochs):
    new_model_1_trans.train()
    running_loss = 0.0

    for images, masks in tqdm(new_train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = new_model_1_trans(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(new_train_dataloader):.4f}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [00:27<00:00,  2.27it/s]


Epoch [1/3], Loss: 0.4292


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:27<00:00,  2.29it/s]


Epoch [2/3], Loss: 0.2692


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:27<00:00,  2.32it/s]

Epoch [3/3], Loss: 0.2074





Оценка модели

In [None]:
new_model_1_trans.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = new_model_1_trans(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:18<00:00,  2.36it/s]

IoU: 0.6123
Dice: 0.7572
F1-score: 0.7572





Из всех вариантов аугментация данных внесла улучшение

### Выводы

Для каждой модели были проверены три гипотезы, благодаря некоторым удалось немного улучшить результат.

Сверточной модели помогла смена оптимизатора.Трансформерной - аугментация данных.

Для скорости обучения было взято малое число эпох и малое подмножество датасета, для более ощутимого улучшения результатов необходимо, как минимум, увеличить их.

## 4. Имплементация алгоритма машинного обучения

### Сверточная модель

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DoubleConv(nn.Module):
    """(Conv2d → BatchNorm → ReLU) × 2"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class Down(nn.Module):
    """MaxPool → DoubleConv"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)

class Up(nn.Module):
    """Upsample → Concatenate → DoubleConv"""
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
        self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

class UNetCust(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super().__init__()
        self.inc = DoubleConv(in_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 1024)

        self.up1 = Up(1024, 512)
        self.up2 = Up(512, 256)
        self.up3 = Up(256, 128)
        self.up4 = Up(128, 64)

        self.outc = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)

        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)

        return torch.sigmoid(self.outc(x))

Определяем модель

In [None]:
model_cust = UNetCust()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_cust.to(device)

criterion = nn.BCEWithLogitsLoss()

optimizer = optim.Adam(model_cust.parameters(), lr=1e-4)

Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    model_cust.train()
    running_loss = 0.0

    for images, masks in tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = model_cust(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_dataloader)}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [13:15<00:00, 12.63s/it]


Epoch [1/3], Loss: 0.7517202032936944


Epoch 2/3 [Train]: 100%|██████████| 63/63 [01:03<00:00,  1.01s/it]


Epoch [2/3], Loss: 0.7123791603814988


Epoch 3/3 [Train]: 100%|██████████| 63/63 [01:03<00:00,  1.01s/it]

Epoch [3/3], Loss: 0.7005015829252819





Оценка модели

In [None]:
model_cust.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = model_cust(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [05:47<00:00,  7.90s/it]

IoU: 0.1317
Dice: 0.2313
F1-score: 0.2313





### Трансформерная модель

In [None]:
class SimpleSegViT(nn.Module):
    def __init__(self,
                 img_size=256,
                 patch_size=16,
                 in_channels=3,
                 dim=256,
                 depth=4,
                 heads=4,
                 mlp_dim=512):
        super().__init__()

        assert img_size % patch_size == 0, "Image size must be divisible by patch size"

        self.patch_size = patch_size
        self.img_size = img_size
        num_patches = (img_size // patch_size) ** 2

        self.patch_embed = nn.Conv2d(in_channels, dim, kernel_size=patch_size, stride=patch_size)

        self.pos_embed = nn.Parameter(torch.randn(1, num_patches, dim))

        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=dim,
                nhead=heads,
                dim_feedforward=mlp_dim,
                batch_first=True
            ),
            num_layers=depth
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(dim, dim//2, kernel_size=2, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(dim//2, dim//4, kernel_size=2, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(dim//4, dim//8, kernel_size=2, stride=2),
            nn.ReLU(),
            nn.Conv2d(dim//8, 1, kernel_size=1)  # 1 канал для бинарной сегментации
        )

    def forward(self, x):
        x = self.patch_embed(x)
        b, d, h, w = x.shape

        x = x.flatten(2).transpose(1, 2)
        x = x + self.pos_embed

        x = self.transformer(x)
        x = x.transpose(1, 2).view(b, d, h, w)

        x = self.decoder(x)

        if x.shape[-2:] != (self.img_size, self.img_size):
            x = F.interpolate(x, size=(self.img_size, self.img_size), mode='bilinear', align_corners=False)

        return x

Определяем модель

In [None]:
model_trans_cust = SimpleSegViT(img_size=256)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_trans_cust.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_trans_cust.parameters(), lr=1e-4)

Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    model_trans_cust.train()
    running_loss = 0.0

    for images, masks in tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()
        if masks.ndim == 3:
            masks = masks.unsqueeze(1)

        optimizer.zero_grad()

        outputs = model_trans_cust(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_dataloader)}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [00:18<00:00,  3.33it/s]


Epoch [1/3], Loss: 0.6552252816775489


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:19<00:00,  3.23it/s]


Epoch [2/3], Loss: 0.5379031215395246


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:18<00:00,  3.34it/s]

Epoch [3/3], Loss: 0.46195946468247306





Оценка модели

In [None]:
model_trans_cust.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()
        if masks.ndim == 3:
            masks = masks.unsqueeze(1)

        outputs = model_trans_cust(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:15<00:00,  2.83it/s]

IoU: 0.2720
Dice: 0.4254
F1-score: 0.4254





### Выводы

Результаты имплементированных моделей оказались существенно хуже результатов встроенных моделей. В целом, иного и не ожидалось в виду простоты имплементаций и малого объёма обучения.

Как и в случае встроенных версий, трансформерная имплементированная модель получилась более эффективной, чем сверточная, но с большей разницей, чем у встроенных версий.

### Улучшения для имплементированных моделей

#### Сверточная модель

##### Аугментация данных

In [None]:
new_model_cust = UNetCust()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model_cust.to(device)

criterion = nn.BCEWithLogitsLoss()

optimizer = optim.Adam(new_model_cust.parameters(), lr=1e-4)

Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    new_model_cust.train()
    running_loss = 0.0

    for images, masks in tqdm(new_train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = new_model_cust(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(new_train_dataloader)}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [13:30<00:00, 12.86s/it]


Epoch [1/3], Loss: 0.7603553543015132


Epoch 2/3 [Train]: 100%|██████████| 63/63 [01:02<00:00,  1.01it/s]


Epoch [2/3], Loss: 0.722726721612234


Epoch 3/3 [Train]: 100%|██████████| 63/63 [01:03<00:00,  1.00s/it]

Epoch [3/3], Loss: 0.7109920590642899





Оценка модели

In [None]:
new_model_cust.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = new_model_cust(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [09:17<00:00, 12.67s/it]

IoU: 0.1317
Dice: 0.2313
F1-score: 0.2313





##### Функция потерь

In [None]:
new_model_cust = UNetCust()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model_cust.to(device)

criterion = CombinedLoss(bce_weight=0.5, dice_weight=0.5)

optimizer = optim.Adam(new_model_cust.parameters(), lr=1e-4)

Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    new_model_cust.train()
    running_loss = 0.0

    for images, masks in tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = new_model_cust(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_dataloader)}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [00:59<00:00,  1.05it/s]


Epoch [1/3], Loss: 0.677591704186939


Epoch 2/3 [Train]: 100%|██████████| 63/63 [01:00<00:00,  1.04it/s]


Epoch [2/3], Loss: 0.6501356817427135


Epoch 3/3 [Train]: 100%|██████████| 63/63 [01:00<00:00,  1.03it/s]

Epoch [3/3], Loss: 0.6400261464573088





Оценка модели

In [None]:
new_model_cust.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = new_model_cust(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:25<00:00,  1.76it/s]

IoU: 0.1317
Dice: 0.2313
F1-score: 0.2313





##### Оптимизатор

In [None]:
new_model_cust = UNetCust()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model_cust.to(device)

criterion = nn.BCEWithLogitsLoss()

optimizer = torch.optim.AdamW(new_model_cust.parameters(), lr=1e-4, weight_decay=1e-5)

Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    new_model_cust.train()
    running_loss = 0.0

    for images, masks in tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()

        optimizer.zero_grad()

        outputs = new_model_cust(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_dataloader)}")


Epoch 1/3 [Train]: 100%|██████████| 63/63 [01:03<00:00,  1.00s/it]


Epoch [1/3], Loss: 0.7571066977485778


Epoch 2/3 [Train]: 100%|██████████| 63/63 [01:05<00:00,  1.04s/it]


Epoch [2/3], Loss: 0.7201947522541833


Epoch 3/3 [Train]: 100%|██████████| 63/63 [01:06<00:00,  1.06s/it]

Epoch [3/3], Loss: 0.7067361417270842





Оценка модели

In [None]:
new_model_cust.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()

        outputs = new_model_cust(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:24<00:00,  1.80it/s]

IoU: 0.1317
Dice: 0.2313
F1-score: 0.2313





Проверка гипотез не принесла улучшений

#### Трансформерная модель

##### Аугментация данных

In [None]:
new_model_trans_cust = SimpleSegViT(img_size=256)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model_trans_cust.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(new_model_trans_cust.parameters(), lr=1e-4)

Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    new_model_trans_cust.train()
    running_loss = 0.0

    for images, masks in tqdm(new_train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()
        if masks.ndim == 3:
            masks = masks.unsqueeze(1)

        optimizer.zero_grad()

        outputs = new_model_trans_cust(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(new_train_dataloader)}")

Epoch 1/3 [Train]: 100%|██████████| 63/63 [00:20<00:00,  3.07it/s]


Epoch [1/3], Loss: 0.6310824042274839


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:19<00:00,  3.22it/s]


Epoch [2/3], Loss: 0.562039938237932


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:19<00:00,  3.16it/s]

Epoch [3/3], Loss: 0.4945545953417581





Оценка модели

In [None]:
new_model_trans_cust.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()
        if masks.ndim == 3:
            masks = masks.unsqueeze(1)

        outputs = new_model_trans_cust(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:14<00:00,  2.98it/s]

IoU: 0.0000
Dice: 0.0000
F1-score: 0.0000





##### Функция потерь

In [None]:
new_model_trans_cust = SimpleSegViT(img_size=256)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model_trans_cust.to(device)

criterion = CombinedLoss(bce_weight=0.5, dice_weight=0.5)
optimizer = torch.optim.Adam(new_model_trans_cust.parameters(), lr=1e-4)

Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    new_model_trans_cust.train()
    running_loss = 0.0

    for images, masks in tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()
        if masks.ndim == 3:
            masks = masks.unsqueeze(1)

        optimizer.zero_grad()

        outputs = new_model_trans_cust(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_dataloader)}")

Epoch 1/3 [Train]: 100%|██████████| 63/63 [00:20<00:00,  3.04it/s]


Epoch [1/3], Loss: 0.6437709113908192


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:22<00:00,  2.84it/s]


Epoch [2/3], Loss: 0.5593918779539684


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:24<00:00,  2.59it/s]

Epoch [3/3], Loss: 0.5015877385934194





Оценка модели

In [None]:
new_model_trans_cust.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()
        if masks.ndim == 3:
            masks = masks.unsqueeze(1)

        outputs = new_model_trans_cust(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:15<00:00,  2.81it/s]

IoU: 0.2604
Dice: 0.4110
F1-score: 0.4110





##### Оптимизатор

In [None]:
new_model_trans_cust = SimpleSegViT(img_size=256)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
new_model_trans_cust.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(new_model_cust.parameters(), lr=1e-4, weight_decay=1e-5)

Обучение модели

In [None]:
from tqdm import tqdm
epochs = 3

for epoch in range(epochs):
    new_model_trans_cust.train()
    running_loss = 0.0

    for images, masks in tqdm(train_dataloader, desc=f'Epoch {epoch+1}/{epochs} [Train]'):
        images = images.to(device)
        masks = masks.to(device).float()
        if masks.ndim == 3:
            masks = masks.unsqueeze(1)

        optimizer.zero_grad()

        outputs = new_model_trans_cust(images)

        loss = criterion(outputs, masks)

        loss.backward()

        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_dataloader)}")

Epoch 1/3 [Train]: 100%|██████████| 63/63 [00:18<00:00,  3.34it/s]


Epoch [1/3], Loss: 0.6635166699924167


Epoch 2/3 [Train]: 100%|██████████| 63/63 [00:19<00:00,  3.30it/s]


Epoch [2/3], Loss: 0.6633548452740624


Epoch 3/3 [Train]: 100%|██████████| 63/63 [00:18<00:00,  3.37it/s]

Epoch [3/3], Loss: 0.6632833878199259





Оценка модели

In [None]:
new_model_trans_cust.eval()
iou_list = []
dice_list = []
f1_list = []

with torch.no_grad():
    for images, masks in tqdm(test_dataloader, desc="Evaluating on Test Set"):
        images = images.to(device)
        masks = masks.to(device).float()
        if masks.ndim == 3:
            masks = masks.unsqueeze(1)

        outputs = new_model_trans_cust(images)

        output_probs = torch.sigmoid(outputs)

        iou = iou_score(output_probs, masks)
        dice = dice_score(output_probs, masks)
        f1 = f1_score_func(output_probs, masks)

        iou_list.append(iou)
        dice_list.append(dice)
        f1_list.append(f1)

avg_iou = np.mean(iou_list)
avg_dice = np.mean(dice_list)
avg_f1 = np.mean(f1_list)

print(f'IoU: {avg_iou:.4f}')
print(f'Dice: {avg_dice:.4f}')
print(f'F1-score: {avg_f1:.4f}')

Evaluating on Test Set: 100%|██████████| 44/44 [00:15<00:00,  2.93it/s]

IoU: 0.0000
Dice: 0.0000
F1-score: 0.0000





Проверка гипотез не принесла улучшений

### Выводы

В отличие от встроенных моделей, улучшений не вышло.

И сверточной и трансформерной модели требуется серьезное улучшение архитектуры и большее обучение на большем объёме данных.