# Лабораторная работа 7

In [None]:
!pip install segmentation-models-pytorch --quiet
!pip install albumentations --quiet
!pip install --upgrade scikit-learn --quiet
!pip install kaggle --quiet

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!pip install -q kaggle

!curl -L -o qw.zip https://www.kaggle.com/api/v1/datasets/download/perke986/face-mask-segmentation-dataset
!unzip qw.zip

cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 78.9M  100 78.9M    0     0  71.0M      0  0:00:01  0:00:01 --:--:--  140M
Archive:  qw.zip
replace Info.txt? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
!pip install segmentation-models-pytorch --quiet

In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
from segmentation_models_pytorch import Unet, Segformer
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from tqdm import tqdm
import cv2


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 4
EPOCHS = 5
NUM_CLASSES = 2
LEARNING_RATE = 1e-4
IMAGE_SIZE = (224, 224)


class FaceMaskDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform

        image_files = os.listdir(images_dir)
        self.image_names = [f for f in image_files if os.path.isfile(os.path.join(masks_dir, os.path.splitext(f)[0] + ".png"))]

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        base_name = os.path.splitext(img_name)[0]

        img_path = os.path.join(self.images_dir, img_name)
        mask_path = os.path.join(self.masks_dir, base_name + ".png")

        image = np.array(Image.open(img_path).convert("RGB"))
        mask = mask_rgb_to_class(Image.open(mask_path).convert("RGB"))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask.long()

In [None]:
def mask_rgb_to_class(mask):
    mask_array = np.array(mask)
    class_mask = np.zeros(mask_array.shape[:2], dtype=np.uint8)

    for rgb, class_idx in MASK_COLORS.items():
        matches = np.all(mask_array == rgb, axis=-1)
        class_mask[matches] = class_idx
    return class_mask

MASK_COLORS = {
    (255, 255, 255): 1,
    (253, 237, 237): 1,
    (252, 219, 219): 1,
}


# Создание бейзлайна и оценка качества

Для создания бейзлайна были использованы следующие модели:

1. Сверточная модель: UNet (реализация с использованием сверточных слоев)

2. Трансформерная модель: SegFormer (модифицированная реализация на основе архитектуры сегментирующих трансформеров)

In [None]:
train_transform = A.Compose([
    A.Resize(*IMAGE_SIZE),
    A.HorizontalFlip(),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

val_transform = A.Compose([
    A.Resize(*IMAGE_SIZE),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
])

In [None]:
dataset = FaceMaskDataset("images", "masks", transform=train_transform)
val_size = int(0.2 * len(dataset))
train_size = len(dataset) - val_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

val_dataset.dataset.transform = val_transform

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

unet_model = Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    classes=NUM_CLASSES,
    activation=None
).to(DEVICE)

segformer_model = Segformer(
    encoder_name="mit_b0",
    encoder_weights="imagenet",
    in_channels=3,
    classes=NUM_CLASSES,
    activation=None
).to(DEVICE)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [23]:
def run_training_epoch(model, dataloader, criterion, optimizer):
    model.train()
    total_loss = 0

    for batch in tqdm(dataloader, desc="Training"):
        imgs, lbls = batch
        imgs = imgs.to(DEVICE)
        lbls = lbls.to(DEVICE)

        preds = model(imgs)
        loss = criterion(preds, lbls)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    return avg_loss


def evaluate_model(model, dataloader):
    model.eval()
    iou_list, dice_list = [], []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluation"):
            imgs, lbls = batch
            imgs = imgs.to(DEVICE)
            lbls = lbls.to(DEVICE)
            logits = model(imgs)
            preds = torch.argmax(logits, dim=1)
            preds_bin = (preds == 1).float()
            lbls_bin = (lbls == 1).float()
            intersect = (preds_bin * lbls_bin).sum(dim=(1, 2))
            union = (preds_bin + lbls_bin - preds_bin * lbls_bin).sum(dim=(1, 2))
            iou = (intersect + 1e-6) / (union + 1e-6)
            dice = (2 * intersect + 1e-6) / (preds_bin.sum(dim=(1, 2)) + lbls_bin.sum(dim=(1, 2)) + 1e-6)
            iou_list.append(iou.cpu().numpy())
            dice_list.append(dice.cpu().numpy())

    mean_iou = np.mean(np.concatenate(iou_list))
    mean_dice = np.mean(np.concatenate(dice_list))
    return mean_iou, mean_dice

In [None]:
loss_fn = CrossEntropyLoss()

# Обучение UNet
optimizer = Adam(unet_model.parameters(), lr=LEARNING_RATE)
for epoch in range(EPOCHS):
    print(f"\n[UNet] Epoch {epoch + 1} of {EPOCHS}")
    loss = train_epoch(unet_model, train_loader, loss_fn, optimizer)
    iou_score, dice_score = evaluate(unet_model, val_loader)
    print(f"  → Training loss: {loss:.4f}")
    print(f"  → Validation IoU: {iou_score:.4f} | Dice: {dice_score:.4f}")

# Обучение SegFormer
optimizer = Adam(segformer_model.parameters(), lr=LEARNING_RATE)
for epoch in range(EPOCHS):
    print(f"\n[SegFormer] Epoch {epoch + 1} of {EPOCHS}")
    loss = train_epoch(segformer_model, train_loader, loss_fn, optimizer)
    iou_score, dice_score = evaluate(segformer_model, val_loader)
    print(f"  → Training loss: {loss:.4f}")
    print(f"  → Validation IoU: {iou_score:.4f} | Dice: {dice_score:.4f}")

# Финальное сравнение моделей
print("\n--- Final Evaluation ---")
for name, model in [("UNet", unet_model), ("SegFormer", segformer_model)]:
    iou_score, dice_score = evaluate(model, val_loader)
    print(f"{name:<9} | IoU: {iou_score:.4f} | Dice: {dice_score:.4f}")



[UNet] Epoch 1 of 5


Training: 100%|██████████| 45/45 [01:03<00:00,  1.41s/it]
Evaluation: 100%|██████████| 11/11 [00:15<00:00,  1.43s/it]


  → Training loss: 0.4038
  → Validation IoU: 0.2743 | Dice: 0.3786

[UNet] Epoch 2 of 5


Training: 100%|██████████| 45/45 [00:59<00:00,  1.31s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.23s/it]


  → Training loss: 0.2025
  → Validation IoU: 0.3961 | Dice: 0.4979

[UNet] Epoch 3 of 5


Training: 100%|██████████| 45/45 [00:58<00:00,  1.29s/it]
Evaluation: 100%|██████████| 11/11 [00:14<00:00,  1.28s/it]


  → Training loss: 0.1304
  → Validation IoU: 0.5046 | Dice: 0.6093

[UNet] Epoch 4 of 5


Training: 100%|██████████| 45/45 [00:59<00:00,  1.31s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.23s/it]


  → Training loss: 0.0963
  → Validation IoU: 0.6251 | Dice: 0.7398

[UNet] Epoch 5 of 5


Training: 100%|██████████| 45/45 [00:58<00:00,  1.31s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.23s/it]


  → Training loss: 0.0757
  → Validation IoU: 0.6672 | Dice: 0.7756

[SegFormer] Epoch 1 of 5


Training: 100%|██████████| 45/45 [00:56<00:00,  1.25s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.23s/it]


  → Training loss: 0.2140
  → Validation IoU: 0.2927 | Dice: 0.3650

[SegFormer] Epoch 2 of 5


Training: 100%|██████████| 45/45 [00:56<00:00,  1.25s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.22s/it]


  → Training loss: 0.0944
  → Validation IoU: 0.3554 | Dice: 0.4328

[SegFormer] Epoch 3 of 5


Training: 100%|██████████| 45/45 [00:56<00:00,  1.26s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.21s/it]


  → Training loss: 0.0580
  → Validation IoU: 0.4520 | Dice: 0.5370

[SegFormer] Epoch 4 of 5


Training: 100%|██████████| 45/45 [00:57<00:00,  1.28s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.22s/it]


  → Training loss: 0.0421
  → Validation IoU: 0.5690 | Dice: 0.6844

[SegFormer] Epoch 5 of 5


Training: 100%|██████████| 45/45 [00:56<00:00,  1.27s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.23s/it]


  → Training loss: 0.0321
  → Validation IoU: 0.5646 | Dice: 0.6649

--- Final Evaluation ---


Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.23s/it]


UNet      | IoU: 0.6672 | Dice: 0.7756


Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.22s/it]

SegFormer | IoU: 0.5646 | Dice: 0.6649





## Оценка качества

При данных условиях и гиперпараметрах лучше всего показала себя модель UNet.

# Улучшенный бейзлайн

Гипотезы:

1. Горизонтальное отражение увеличивает разнообразие тренировочных данных, что способствует лучшей обобщающей способности модели;

2. Аффинные преобразования помогают модели стать устойчивой к изменениям положения и ориентации объектов на изображениях;

3. Изменение яркости и контраста моделирует различные условия освещения, что повышает устойчивость модели к вариациям освещения в данных;

4. Эластичные деформации добавляют разнообразие формы объектов, что полезно для задач сегментации с вариативными контурами.

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torch.nn import CrossEntropyLoss

IMG_SIZE = 256

# Трансформации
common_resize = A.Resize(IMG_SIZE, IMG_SIZE)

train_transform = A.Compose([
    common_resize,
    A.HorizontalFlip(p=0.5),
    A.Affine(scale=(0.9, 1.1), translate_percent=(0.1, 0.1), rotate=(-30, 30), p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.ElasticTransform(p=0.2),
    A.Normalize(),
    ToTensorV2(),
])


val_transform = A.Compose([
    common_resize,
    A.Normalize(),
    ToTensorV2(),
])


In [None]:
# Функция для обучения и валидации
def train_and_evaluate(model, name):
    optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
    print(f"\n--- Training {name} ---")
    for epoch in range(EPOCHS):
        print(f"[{name}] Epoch {epoch + 1}/{EPOCHS}")
        train_loss = run_training_epoch(model, train_loader, loss_fn, optimizer)
        iou, dice = evaluate_model(model, val_loader)
        print(f"  Loss: {train_loss:.4f} | IoU: {iou:.4f} | Dice: {dice:.4f}")

In [None]:
# Основной блок
loss_fn = CrossEntropyLoss()

train_and_evaluate(unet_model, "UNet")
train_and_evaluate(segformer_model, "SegFormer")

# Финальное сравнение
print("\n=== Final Evaluation ===")
for model_name, model in [("UNet", unet_model), ("SegFormer", segformer_model)]:
    iou, dice = evaluate_model(model, val_loader)
    print(f"{model_name:<10} IoU: {iou:.4f} | Dice: {dice:.4f}\n")


--- Training UNet ---
[UNet] Epoch 1/5


Training: 100%|██████████| 45/45 [00:58<00:00,  1.29s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.22s/it]


  Loss: 0.3644 | IoU: 0.2612 | Dice: 0.3645
[UNet] Epoch 2/5


Training: 100%|██████████| 45/45 [00:58<00:00,  1.30s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.19s/it]


  Loss: 0.1803 | IoU: 0.4630 | Dice: 0.5415
[UNet] Epoch 3/5


Training: 100%|██████████| 45/45 [00:57<00:00,  1.28s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.19s/it]


  Loss: 0.1099 | IoU: 0.5197 | Dice: 0.6083
[UNet] Epoch 4/5


Training: 100%|██████████| 45/45 [00:57<00:00,  1.27s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.19s/it]


  Loss: 0.0836 | IoU: 0.5542 | Dice: 0.6453
[UNet] Epoch 5/5


Training: 100%|██████████| 45/45 [00:56<00:00,  1.25s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.27s/it]


  Loss: 0.0646 | IoU: 0.6154 | Dice: 0.7078

--- Training SegFormer ---
[SegFormer] Epoch 1/5


Training: 100%|██████████| 45/45 [00:56<00:00,  1.26s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.20s/it]


  Loss: 0.2653 | IoU: 0.2421 | Dice: 0.3168
[SegFormer] Epoch 2/5


Training: 100%|██████████| 45/45 [00:56<00:00,  1.26s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.19s/it]


  Loss: 0.1193 | IoU: 0.3831 | Dice: 0.4727
[SegFormer] Epoch 3/5


Training: 100%|██████████| 45/45 [00:55<00:00,  1.24s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.20s/it]


  Loss: 0.0721 | IoU: 0.4792 | Dice: 0.5689
[SegFormer] Epoch 4/5


Training: 100%|██████████| 45/45 [00:55<00:00,  1.23s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.19s/it]


  Loss: 0.0506 | IoU: 0.4786 | Dice: 0.5645
[SegFormer] Epoch 5/5


Training: 100%|██████████| 45/45 [00:55<00:00,  1.23s/it]
Evaluation: 100%|██████████| 11/11 [00:12<00:00,  1.18s/it]


  Loss: 0.0403 | IoU: 0.5373 | Dice: 0.6323

=== Final Evaluation ===


Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.19s/it]


UNet       IoU: 0.6154 | Dice: 0.7078



Evaluation: 100%|██████████| 11/11 [00:12<00:00,  1.18s/it]

SegFormer  IoU: 0.5373 | Dice: 0.6323






## Сравнение с бейзлайном

1. UNet: ухудшение по IoU и Dice (~0.05 и ~0.07)

2. SegFormer: также снижение по метрикам

## Выводы

Улучшения, проведённые для бейзлайна, не привели к улучшению качества моделей на валидации, возможно из-за переобучения или неподходящих гиперпараметров.

# Имплементация алгоритма машинного обучения

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

class BlockUnit(nn.Module):
    def __init__(self, ch_in, ch_out):
        super().__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(ch_in, ch_out, 3, padding=1),
            nn.BatchNorm2d(ch_out),
            nn.ReLU(inplace=True),
            nn.Conv2d(ch_out, ch_out, 3, padding=1),
            nn.BatchNorm2d(ch_out),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.layer(x)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BlockUnit(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.block(x)

class CustomUNet(nn.Module):
    def __init__(self, input_ch=3, n_classes=2, layer_sizes=(64, 128, 256, 512)):
        super().__init__()
        self.encoder_stages = nn.ModuleList()
        self.decoder_stages = nn.ModuleList()
        self.pooling = nn.MaxPool2d(2)

        ch = input_ch
        for size in layer_sizes:
            self.encoder_stages.append(BlockUnit(ch, size))
            ch = size

        self.bridge_unit = BlockUnit(layer_sizes[-1], layer_sizes[-1]*2)

        for size in reversed(layer_sizes):
            self.decoder_stages.append(nn.ConvTranspose2d(size * 2, size, 2, stride=2))
            self.decoder_stages.append(BlockUnit(size * 2, size))

        self.output_layer = nn.Conv2d(layer_sizes[0], n_classes, 1)

    def forward(self, x):
        saved = []
        for encoder in self.encoder_stages:
            x = encoder(x)
            saved.append(x)
            x = self.pooling(x)

        x = self.bridge_unit(x)
        saved = saved[::-1]

        for i in range(0, len(self.decoder_stages), 2):
            x = self.decoder_stages[i](x)
            skip = saved[i // 2]
            if x.shape[2:] != skip.shape[2:]:
                x = F.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=False)
            x = torch.cat((skip, x), dim=1)
            x = self.decoder_stages[i + 1](x)

        return self.output_layer(x)


In [None]:
class SplitPatch(nn.Module):
    def __init__(self, ch_in=3, dim=64, psize=4):
        super().__init__()
        self.projection = nn.Conv2d(ch_in, dim, kernel_size=psize, stride=psize)

    def forward(self, x):
        return self.projection(x)

class LinearHead(nn.Module):
    def __init__(self, dim, out_classes):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Conv2d(dim, dim, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(dim, out_classes, kernel_size=1)
        )

    def forward(self, x):
        return self.mlp(x)

class CustomSegFormer(nn.Module):
    def __init__(self, ch_in=3, cls_out=2, emb_size=64):
        super().__init__()
        self.tokenizer = SplitPatch(ch_in, dim=emb_size, psize=4)

        self.backbone = nn.Sequential(
            nn.Conv2d(emb_size, emb_size, kernel_size=3, padding=1),
            nn.BatchNorm2d(emb_size),
            nn.ReLU(inplace=True),
            nn.Conv2d(emb_size, emb_size, kernel_size=3, padding=1),
            nn.BatchNorm2d(emb_size),
            nn.ReLU(inplace=True)
        )

        self.head = LinearHead(emb_size, cls_out)

        self.upscale = nn.Sequential(
            nn.ConvTranspose2d(cls_out, cls_out, 2, stride=2),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(cls_out, cls_out, 2, stride=2)
        )

    def forward(self, x):
        x = self.tokenizer(x)
        x = self.backbone(x)
        x = self.head(x)
        x = self.upscale(x)
        return x


In [None]:
unet_model = CustomUNet(input_ch=3, n_classes=NUM_CLASSES).to(DEVICE)
segformer_model = CustomSegFormer(ch_in=3, cls_out=NUM_CLASSES).to(DEVICE)

unet_optimizer = Adam(unet_model.parameters(), lr=LEARNING_RATE)
segformer_optimizer = Adam(segformer_model.parameters(), lr=LEARNING_RATE)

loss_fn = nn.CrossEntropyLoss()

for epoch in range(EPOCHS):
    print(f"CustomUNet Epoch {epoch+1}/{EPOCHS}")
    loss = run_training_epoch(unet_model, train_loader, loss_fn, unet_optimizer)
    iou_score, dice_score = evaluate_model(unet_model, val_loader)
    print(f"Loss: {loss:.4f}, IoU: {iou_score:.4f}, Dice: {dice_score:.4f}")

for epoch in range(EPOCHS):
    print(f"CustomSegFormer Epoch {epoch+1}/{EPOCHS}")
    loss = run_training_epoch(segformer_model, train_loader, loss_fn, segformer_optimizer)
    iou_score, dice_score = evaluate_model(segformer_model, val_loader)
    print(f"Loss: {loss:.4f}, IoU: {iou_score:.4f}, Dice: {dice_score:.4f}")

print("Final Evaluation:")
for name, model in [("CustomUNet", unet_model), ("CustomSegFormer", segformer_model)]:
    iou_score, dice_score = evaluate_model(model, val_loader)
    print(f"{name}: IoU={iou_score:.4f}, Dice={dice_score:.4f}\n")


CustomUNet Epoch 1/5


Training: 100%|██████████| 45/45 [00:59<00:00,  1.31s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.21s/it]


Loss: 0.5096, IoU: 0.1210, Dice: 0.1988
CustomUNet Epoch 2/5


Training: 100%|██████████| 45/45 [00:59<00:00,  1.31s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.20s/it]


Loss: 0.3329, IoU: 0.1875, Dice: 0.2872
CustomUNet Epoch 3/5


Training: 100%|██████████| 45/45 [00:59<00:00,  1.32s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.21s/it]


Loss: 0.2437, IoU: 0.2756, Dice: 0.3861
CustomUNet Epoch 4/5


Training: 100%|██████████| 45/45 [00:58<00:00,  1.31s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.23s/it]


Loss: 0.2437, IoU: 0.0382, Dice: 0.0633
CustomUNet Epoch 5/5


Training: 100%|██████████| 45/45 [00:58<00:00,  1.31s/it]
Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.25s/it]


Loss: 0.2181, IoU: 0.3124, Dice: 0.4217
CustomSegFormer Epoch 1/5


Training: 100%|██████████| 45/45 [00:50<00:00,  1.12s/it]
Evaluation: 100%|██████████| 11/11 [00:12<00:00,  1.12s/it]


Loss: 0.4882, IoU: 0.0984, Dice: 0.1659
CustomSegFormer Epoch 2/5


Training: 100%|██████████| 45/45 [00:50<00:00,  1.12s/it]
Evaluation: 100%|██████████| 11/11 [00:12<00:00,  1.11s/it]


Loss: 0.4476, IoU: 0.1521, Dice: 0.2328
CustomSegFormer Epoch 3/5


Training: 100%|██████████| 45/45 [00:49<00:00,  1.11s/it]
Evaluation: 100%|██████████| 11/11 [00:12<00:00,  1.11s/it]


Loss: 0.4123, IoU: 0.2079, Dice: 0.2983
CustomSegFormer Epoch 4/5


Training: 100%|██████████| 45/45 [00:49<00:00,  1.11s/it]
Evaluation: 100%|██████████| 11/11 [00:12<00:00,  1.11s/it]


Loss: 0.3867, IoU: 0.2447, Dice: 0.3412
CustomSegFormer Epoch 5/5


Training: 100%|██████████| 45/45 [00:50<00:00,  1.11s/it]
Evaluation: 100%|██████████| 11/11 [00:12<00:00,  1.11s/it]


Loss: 0.3691, IoU: 0.2695, Dice: 0.3696
Final Evaluation:


Evaluation: 100%|██████████| 11/11 [00:13<00:00,  1.22s/it]


CustomUNet: IoU=0.3124, Dice=0.4217



Evaluation: 100%|██████████| 11/11 [00:12<00:00,  1.11s/it]

CustomSegFormer: IoU=0.2695, Dice=0.3696






## Сравнение с бейзлайном

Реализованные модели значительно уступают по метрикам как исходному, так и улучшенному бейзлайну.

## Выводы

Собственная реализация моделей требует доработки и оптимизации — текущая версия показывает намного худшие результаты.

# Итоговые выводы
1. Исходный бейзлайн (torchvision UNet и SegFormer) показывает наилучшее качество.

2. Улучшение бейзлайна пока не дало улучшения, требует дополнительной настройки.

3. Собственная реализация моделей сильно уступает по качеству, нужна доработка.