# ЛР 6

Загрузим датасет для коллаба

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip /content/drive/MyDrive/mmtt2/archive.zip

Ставим зависимости

In [None]:
!pip install --upgrade pip
!pip install torch torchvision segmentation-models-pytorch ultralytics

Фиксируем генератор случайных чисел, чтоб при повторном запуске метрики +- совпали

In [None]:
import random
import numpy as np
import torch

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

Проверка, чтоб запускаться преимущественно на куде

In [None]:
import os
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import accuracy_score, f1_score
from torch.optim.lr_scheduler import ReduceLROnPlateau

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Running on", device)

Подготавливаем датасет, все изображения в 150x150, переводим в тензор pytorch и нормалайзим

In [None]:
import os
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from torch.utils.data import random_split

repo_root = os.path.abspath(os.path.join(os.getcwd(), "."))

train_dir = os.path.join(repo_root, "seg_train", "seg_train")
test_dir  = os.path.join(repo_root, "seg_test",  "seg_test")

baseline_transform = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

train_ds = datasets.ImageFolder(train_dir, transform=baseline_transform)
test_ds  = datasets.ImageFolder(test_dir,  transform=baseline_transform)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True,  num_workers=4)
test_loader  = DataLoader(test_ds, batch_size=64, shuffle=False, num_workers=4)

print("Classes:", train_ds.classes)
print("Train / Test samples:", len(train_ds), "/", len(test_ds))

Выбираем метрики. Для несбалансированных данных macro-F1 отражает качество получше

In [None]:
from sklearn.metrics import accuracy_score, f1_score

def compute_metrics(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    f1m = f1_score(y_true, y_pred, average='macro')
    return {'accuracy': acc, 'f1_macro': f1m}

Бейзлайн resnet-18 1 эпоха

In [None]:
num_classes = len(train_ds.classes)
model_base = torchvision.models.resnet18(pretrained=True)
model_base.fc = nn.Linear(model_base.fc.in_features, num_classes)
model_base = model_base.to(device)

optimizer_base = torch.optim.Adam(model_base.parameters(), lr=1e-4)
criterion = nn.CrossEntropyLoss()
scheduler_base = ReduceLROnPlateau(optimizer_base, mode='min', factor=0.5, patience=1, verbose=True)

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    losses, preds, trues = [], [], []
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        preds += out.argmax(1).cpu().tolist()
        trues += yb.cpu().tolist()
    return sum(losses)/len(losses), compute_metrics(trues, preds)

def eval_model(model, loader, criterion, device):
    model.eval()
    losses, preds, trues = [], [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            loss = criterion(out, yb)
            losses.append(loss.item())
            preds += out.argmax(1).cpu().tolist()
            trues += yb.cpu().tolist()
    return sum(losses)/len(losses), compute_metrics(trues, preds)

# 4) Один цикл
train_loss, train_metrics = train_one_epoch(model_base, train_loader, optimizer_base, criterion, device)
test_loss,  test_metrics  = eval_model(model_base, test_loader, criterion, device)
scheduler_base.step(test_loss)

print(f"Baseline (1 epoch) train acc={train_metrics['accuracy']:.4f}, f1={train_metrics['f1_macro']:.4f}")
print(f"Baseline (1 epoch) test acc={test_metrics['accuracy']:.4f}, f1={test_metrics['f1_macro']:.4f}")

Аугментация данных

In [None]:
train_transform_aug = transforms.Compose([
    transforms.Resize((96,96)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])
test_transform = transforms.Compose([
    transforms.Resize((96,96)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])

train_ds_aug = datasets.ImageFolder(train_dir, transform=train_transform_aug)
test_ds_aug  = datasets.ImageFolder(test_dir,  transform=test_transform)

subset_size = len(train_ds_aug)//3
train_subset_aug, _ = random_split(train_ds_aug, [subset_size, len(train_ds_aug)-subset_size])

train_loader_aug = DataLoader(train_subset_aug, batch_size=64, shuffle=True,  num_workers=4)
test_loader_aug  = DataLoader(test_ds_aug,      batch_size=64, shuffle=False, num_workers=4)

print("Augmented train subset:", len(train_subset_aug))


Улучшаем resnet18. Ускоряем обучение и предотвращаем затирание уже выученных низкоуровенвых фильтров

In [None]:
model_imp = torchvision.models.resnet18(pretrained=True)
for name, p in model_imp.named_parameters():
    if not (name.startswith("layer4") or name.startswith("fc")):
        p.requires_grad = False

model_imp.fc = nn.Linear(model_imp.fc.in_features, num_classes)
model_imp = model_imp.to(device)

optimizer_imp = torch.optim.Adam([
    {'params': model_imp.layer4.parameters(), 'lr':1e-4},
    {'params': model_imp.fc.parameters(),    'lr':1e-3},
])
scheduler_imp = ReduceLROnPlateau(optimizer_imp, mode='min', factor=0.5, patience=1, verbose=True)

tl, tm = train_one_epoch(model_imp, train_loader_aug, optimizer_imp, criterion, device)
vl, vm = eval_model(model_imp, test_loader_aug, criterion, device)
scheduler_imp.step(vl)

print(f"Improved train acc={tm['accuracy']:.4f}, f1={tm['f1_macro']:.4f}")
print(f"Improved test acc={vm['accuracy']:.4f}, f1={vm['f1_macro']:.4f}")

resnet18 с полным дообучением

In [None]:
model_e2 = torchvision.models.resnet18(pretrained=True)
model_e2.fc = nn.Linear(model_e2.fc.in_features, num_classes)
model_e2 = model_e2.to(device)

opt_e2 = torch.optim.Adam(model_e2.parameters(), lr=1e-4)
sch_e2 = ReduceLROnPlateau(opt_e2, mode='min', factor=0.5, patience=1, verbose=True)

for epoch in range(2):
    tl, tm = train_one_epoch(model_e2, train_loader, opt_e2, criterion, device)
    vl, vm = eval_model   (model_e2, test_loader,   criterion, device)
    sch_e2.step(vl)
    print(f"Epoch {epoch+1} train acc={tm['accuracy']:.4f}, f1={tm['f1_macro']:.4f} | test acc={vm['accuracy']:.4f}, f1={vm['f1_macro']:.4f}")


Трансформер vit, сравниваем с cnn, прогон 1 эпохи

In [None]:
from torchvision.models import vit_b_16, ViT_B_16_Weights
import torch.nn as nn
from torchvision import datasets
from torch.utils.data import DataLoader, random_split

weights = ViT_B_16_Weights.DEFAULT
preprocess_vit = weights.transforms()

model_vit = vit_b_16(weights=weights)
for p in model_vit.parameters():
    p.requires_grad = False

in_f = model_vit.heads.head.in_features
model_vit.heads.head = nn.Linear(in_f, num_classes)
model_vit = model_vit.to(device)

ds_vit = datasets.ImageFolder(train_dir, transform=preprocess_vit)
sz = len(ds_vit) // 8
sub_vit, _ = random_split(ds_vit, [sz, len(ds_vit)-sz])
ldr_vit = DataLoader(sub_vit, batch_size=64, shuffle=True, num_workers=2)

test_ds_vit = datasets.ImageFolder(test_dir, transform=preprocess_vit)
test_loader_vit = DataLoader(test_ds_vit, batch_size=64, shuffle=False, num_workers=2)

opt_vit = torch.optim.Adam(model_vit.heads.head.parameters(), lr=1e-3)
crit_vit = nn.CrossEntropyLoss()

vtl, vtm = train_one_epoch(model_vit, ldr_vit, opt_vit, crit_vit, device)
vvl, vvm = eval_model(model_vit, test_loader_vit, crit_vit, device)

print(f"Fast ViT train acc={vtm['accuracy']:.4f}, f1={vtm['f1_macro']:.4f}")
print(f"Fast ViT test acc={vvm['accuracy']:.4f}, f1={vvm['f1_macro']:.4f}")


In [None]:
model_fin = torchvision.models.resnet18(pretrained=True)
model_fin.fc = nn.Linear(model_fin.fc.in_features, num_classes)
model_fin = model_fin.to(device)

opt_fin = torch.optim.Adam(model_fin.parameters(), lr=1e-4)
sch_fin = ReduceLROnPlateau(opt_fin, mode='min', factor=0.5, patience=1, verbose=True)

for epoch in range(2):
    tl, tm = train_one_epoch(model_fin, train_loader, opt_fin, criterion, device)
    vl, vm = eval_model   (model_fin, test_loader, criterion, device)
    sch_fin.step(vl)
    print(f"[FINAL] Epoch {epoch+1} train acc={tm['accuracy']:.4f}, f1={tm['f1_macro']:.4f} | test acc={vm['accuracy']:.4f}, f1={vm['f1_macro']:.4f}")


**Сравнение Baseline и Improved на полном наборе**  
| Модель                       | Test acc | Test F1 (macro) |
|------------------------------|---------:|----------------:|
| Бейзлайн ResNet-18 (1 ep)    |    0.9303|           0.9316|
| Улучшенный ResNet-18 (2 ep+sch) |    0.9363|           0.9376|

**Выводы:**  
- Увеличение числа эпох с 1->2 + использование `ReduceLROnPlateau` дало прирост F1~0.006
- Эксперимент с частичной fine-tuning и легкими аугментациями на 1/3 данных снизил качество
- Ускоренный ViT (head-only на 1/8) показал F1≈0.886 -> уступает ResNet-18 на этих данных

Логистическая регрессия - один линейный слой

In [None]:
import torch.nn as nn

class LogisticRegressionModel(nn.Module):
    def __init__(self, in_features, num_classes):
        super().__init__()
        self.linear = nn.Linear(in_features, num_classes)
    def forward(self, x):
        return self.linear(x.view(x.size(0), -1))

in_features = 3 * 150 * 150
model_lr = LogisticRegressionModel(in_features, num_classes).to(device)

optimizer_lr = torch.optim.Adam(model_lr.parameters(), lr=1e-4)
criterion    = nn.CrossEntropyLoss()

for epoch in range(5):
    tr_loss, tr_metrics = train_one_epoch(model_lr, train_loader, optimizer_lr, criterion, device)
    vl_loss, vl_metrics = eval_model   (model_lr, test_loader,  criterion, device)
    print(
        f"Epoch {epoch+1} "
        f"train acc={tr_metrics['accuracy']:.4f}, f1={tr_metrics['f1_macro']:.4f} | "
        f" test acc={vl_metrics['accuracy']:.4f}, f1={vl_metrics['f1_macro']:.4f}"
    )


Простая MLP с 1 скрытым слоем и ReLU

In [None]:
class SimpleMLP(nn.Module):
    def __init__(self, in_features, hidden_size, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes)
        )
    def forward(self, x):
        return self.net(x)

hidden_size = 512
model_mlp = SimpleMLP(in_features, hidden_size, num_classes).to(device)

optimizer_mlp = torch.optim.Adam(model_mlp.parameters(), lr=1e-4)

for epoch in range(5):
    tr_loss, tr_metrics = train_one_epoch(model_mlp, train_loader, optimizer_mlp, criterion, device)
    vl_loss, vl_metrics = eval_model   (model_mlp, test_loader,  criterion, device)
    print(
        f"Epoch {epoch+1} "
        f"train acc={tr_metrics['accuracy']:.4f}, f1={tr_metrics['f1_macro']:.4f} | "
        f" test acc={vl_metrics['accuracy']:.4f}, f1={vl_metrics['f1_macro']:.4f}"
    )


Логистическая регрессия + ReduceLROnPlateu

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

model_lr = LogisticRegressionModel(in_features, num_classes).to(device)
opt_lr   = torch.optim.Adam(model_lr.parameters(), lr=1e-4)
sched_lr = ReduceLROnPlateau(opt_lr, mode='min', factor=0.5, patience=2, verbose=True)
crit     = nn.CrossEntropyLoss()

for epoch in range(10):
    tr_loss, tr_m = train_one_epoch(model_lr, train_loader, opt_lr, crit, device)
    vl_loss, vl_m = eval_model   (model_lr, test_loader,  crit, device)
    sched_lr.step(vl_loss)
    print(f"Epoch {epoch+1} train f1={tr_m['f1_macro']:.4f} | test f1={vl_m['f1_macro']:.4f}")


MLP с аугментациями и scheduler

In [None]:
aug_transform = transforms.Compose([
    transforms.ColorJitter(0.1,0.1,0.1,0.1),
    transforms.Resize((150,150)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
train_ds_aug2 = datasets.ImageFolder(train_dir, transform=aug_transform)
train_loader_aug2 = DataLoader(train_ds_aug2, batch_size=64, shuffle=True, num_workers=4)

model_mlp2 = SimpleMLP(in_features, hidden_size, num_classes).to(device)
opt_mlp2   = torch.optim.Adam(model_mlp2.parameters(), lr=1e-4)
sched_mlp2 = ReduceLROnPlateau(opt_mlp2, mode='min', factor=0.5, patience=2, verbose=True)

for epoch in range(10):
    tr_loss, tr_m = train_one_epoch(model_mlp2, train_loader_aug2, opt_mlp2, crit, device)
    vl_loss, vl_m = eval_model   (model_mlp2, test_loader,       crit, device)
    sched_mlp2.step(vl_loss)
    print(f"Epoch {epoch+1} train f1={tr_m['f1_macro']:.4f} | test f1={vl_m['f1_macro']:.4f}")


# Выводы по ЛР №6

- Метрики качества: использовали `accuracy` и `macro-F1`
- Baseline ResNet-18 (1 эпоха, full data):
  - Test acc = 0.9303, macro-F1 = 0.9316
- Improved ResNet-18 (2 эпохи + ReduceLROnPlateau, full data):
  - Test acc = 0.9363, macro-F1 = 0.9376
- Vision Transformer (head-only на 1/8 data):
  - Test macro-F1 ≈ 0.8863
- Logistic Regression + Scheduler (10 эпох):
  - Test macro-F1 ≈ 0.47
- Simple MLP + Scheduler + Augmentations (10 эпох):
  - Test macro-F1 ≈ 0.60

## Итого получаем:

1. ResNet-18 показал отличные результаты уже в одну эпоху и ещё улучшился при 2 эпохах + lr-scheduler
2. ViT (head-only) на малом поднаборе данных даёт заметно более низкие показатели по сравнению с ResNet
3. Простые модели LogReg, MLP даже с scheduler и аугментациями не приближаются к качеству глубоких сетей - макро-F1 < 0.61
4. Финальный improved-бейзлайн - ResNet-18, 2 эпохи, полный датасет, ReduceLROnPlateau (test macro-F1 = 0.9376)


# ЛР 7

Аналогично

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip /content/drive/MyDrive/mmtt2/lr7.zip

Подготавливаем датасет для сегментации - формируем пары [img, mask] для smp.SegmentationDataset

In [None]:
!pip install pycocotools

import os
import numpy as np
from PIL import Image
from pycocotools.coco import COCO
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import InterpolationMode

ROOT = "/content"
SPLITS = ["train", "valid", "test"]

IMG_SIZE = (256, 256)
img_tf = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225])
])
mask_tf = transforms.Compose([
    transforms.Resize(IMG_SIZE, interpolation=InterpolationMode.NEAREST),
    transforms.PILToTensor(),
    transforms.Lambda(lambda x: x.squeeze(0).long())
])

class CocoSegDataset(Dataset):
    def __init__(self, img_dir, ann_file, img_transform=None, mask_transform=None):
        self.coco = COCO(ann_file)
        self.img_ids = list(self.coco.imgs.keys())
        self.img_dir = img_dir
        self.img_transform = img_transform
        self.mask_transform = mask_transform

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        img_info = self.coco.loadImgs(img_id)[0]
        path = img_info['file_name']
        img = Image.open(os.path.join(self.img_dir, path)).convert("RGB")
        ann_ids = self.coco.getAnnIds(imgIds=[img_id])
        anns = self.coco.loadAnns(ann_ids)
        mask = np.zeros((img_info['height'], img_info['width']), dtype=np.uint8)
        for ann in anns:
            mask = np.maximum(mask, self.coco.annToMask(ann))
        mask = Image.fromarray(mask)

        if self.img_transform:
            img = self.img_transform(img)
        if self.mask_transform:
            mask = self.mask_transform(mask)
        return img, mask

loaders = {}
for split in SPLITS:
    img_dir  = os.path.join(ROOT, split)
    ann_file = os.path.join(ROOT, split, "_annotations.coco.json")
    ds = CocoSegDataset(img_dir, ann_file, img_tf, mask_tf)
    loaders[split] = DataLoader(ds, batch_size=8,
                                shuffle=(split=="train"),
                                num_workers=2)


xb, yb = next(iter(loaders["train"]))
print("Images:", xb.shape)
print("Masks: ", yb.shape)
print("Unique mask values:", torch.unique(yb))

Выбираем метрики IoU и Dice

In [None]:
import torch

def iou_score(pred: torch.Tensor, target: torch.Tensor, smooth=1e-6):
    pred = (pred > 0.5).long()
    intersection = (pred & target).float().sum((1,2))
    union = (pred | target).float().sum((1,2))
    return ((intersection + smooth) / (union + smooth)).mean()

def dice_score(pred: torch.Tensor, target: torch.Tensor, smooth=1e-6):
    pred = (pred > 0.5).long()
    intersection = (pred & target).float().sum((1,2)) * 2
    total = pred.float().sum((1,2)) + target.float().sum((1,2))
    return ((intersection + smooth) / (total + smooth)).mean()


In [None]:
!pip install segmentation_models_pytorch

Бейзлайн для UNet

In [None]:
import segmentation_models_pytorch as smp
import torch.nn as nn

import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

model_unet = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
    activation=None
).to(device)

criterion_seg = nn.BCEWithLogitsLoss()
optimizer_seg = torch.optim.Adam(model_unet.parameters(), lr=1e-4)
scheduler_seg = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer_seg, mode='min', factor=0.5, patience=1, verbose=True
)

def train_epoch_seg(model, loader, optimizer, criterion, device):
    model.train()
    total_loss, ious, dices = 0, [], []
    for imgs, masks in loader:
        imgs, masks = imgs.to(device), masks.to(device)
        optimizer.zero_grad()
        logits = model(imgs)
        loss = criterion(logits.squeeze(1), masks.float())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        preds = torch.sigmoid(logits).squeeze(1)
        ious.append(iou_score(preds, masks).item())
        dices.append(dice_score(preds, masks).item())
    return total_loss/len(loader), sum(ious)/len(ious), sum(dices)/len(dices)

def eval_epoch_seg(model, loader, criterion, device):
    model.eval()
    total_loss, ious, dices = 0, [], []
    with torch.no_grad():
        for imgs, masks in loader:
            imgs, masks = imgs.to(device), masks.to(device)
            logits = model(imgs)
            loss = criterion(logits.squeeze(1), masks.float())
            total_loss += loss.item()
            preds = torch.sigmoid(logits).squeeze(1)
            ious.append(iou_score(preds, masks).item())
            dices.append(dice_score(preds, masks).item())
    return total_loss/len(loader), sum(ious)/len(ious), sum(dices)/len(dices)

train_loss, train_iou, train_dice = train_epoch_seg(
    model_unet, loaders["train"], optimizer_seg, criterion_seg, device
)
val_loss,   val_iou,   val_dice   = eval_epoch_seg(
    model_unet, loaders["valid"], criterion_seg, device
)
scheduler_seg.step(val_loss)

print(f"Unet Baseline train IoU={train_iou:.4f}, Dice={train_dice:.4f}")
print(f"Unet Baseline val IoU={val_iou:.4f}, Dice={val_dice:.4f}")


Улучшенный DeepLabV3, аугментация

In [None]:
from torchvision import transforms
from torchvision.transforms import InterpolationMode
from torch.utils.data import DataLoader

aug_img_tf = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

aug_mask_tf = mask_tf

train_ds_aug = CocoSegDataset(
    img_dir=os.path.join(ROOT, "train"),
    ann_file=os.path.join(ROOT, "train", "_annotations.coco.json"),
    img_transform=aug_img_tf,
    mask_transform=aug_mask_tf
)
valid_ds = CocoSegDataset(
    img_dir=os.path.join(ROOT, "valid"),
    ann_file=os.path.join(ROOT, "valid", "_annotations.coco.json"),
    img_transform=img_tf,
    mask_transform=mask_tf
)

train_loader_aug = DataLoader(train_ds_aug, batch_size=8, shuffle=True, num_workers=2)
valid_loader = DataLoader(valid_ds, batch_size=8, shuffle=False, num_workers=2)

print("Train aug batch: ", next(iter(train_loader_aug))[0].shape,
      next(iter(train_loader_aug))[1].shape)
print("Valid batch: ", next(iter(valid_loader))[0].shape,
      next(iter(valid_loader))[1].shape)

In [None]:
import segmentation_models_pytorch as smp
import torch.nn as nn

model_unet_ft = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
    activation=None
).to(device)

for name, param in model_unet_ft.named_parameters():
    if name.startswith("encoder"):
        param.requires_grad = False

from segmentation_models_pytorch.losses import DiceLoss
bce_loss  = nn.BCEWithLogitsLoss()
dice_loss = DiceLoss(mode='binary')

def composite_loss(logits, masks):
    bce = bce_loss(logits.squeeze(1), masks.float())
    dice = dice_loss(torch.sigmoid(logits), masks.unsqueeze(1).float())
    return bce + dice

optimizer_ft = torch.optim.Adam(model_unet_ft.parameters(), lr=1e-4)
scheduler_ft = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer_ft, mode='min', factor=0.5, patience=1, verbose=True
)

def train_epoch_improved(model, loader, optimizer, device):
    model.train()
    epoch_loss, epoch_iou, epoch_dice = 0, [], []
    for imgs, masks in loader:
        imgs, masks = imgs.to(device), masks.to(device)
        optimizer.zero_grad()
        logits = model(imgs)
        loss = composite_loss(logits, masks)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        preds = torch.sigmoid(logits).squeeze(1)
        epoch_iou.append(iou_score(preds, masks).item())
        epoch_dice.append(dice_score(preds, masks).item())
    return epoch_loss/len(loader), sum(epoch_iou)/len(epoch_iou), sum(epoch_dice)/len(epoch_dice)

def eval_epoch_improved(model, loader, device):
    model.eval()
    val_loss, val_iou, val_dice = 0, [], []
    with torch.no_grad():
        for imgs, masks in loader:
            imgs, masks = imgs.to(device), masks.to(device)
            logits = model(imgs)
            loss = composite_loss(logits, masks)
            val_loss += loss.item()
            preds = torch.sigmoid(logits).squeeze(1)
            val_iou.append(iou_score(preds, masks).item())
            val_dice.append(dice_score(preds, masks).item())
    return val_loss/len(loader), sum(val_iou)/len(val_iou), sum(val_dice)/len(val_dice)


Прогоним улучшенный UNet с composite loss и аугментациями

In [None]:
for epoch in range(1, 4):
    tr_loss, tr_iou, tr_dice = train_epoch_improved(
        model_unet_ft, train_loader_aug, optimizer_ft, device
    )
    val_loss, val_iou, val_dice = eval_epoch_improved(
        model_unet_ft, valid_loader, device
    )
    scheduler_ft.step(val_loss)

    print(
        f"Epoch {epoch} "
        f"train IoU={tr_iou:.4f}, Dice={tr_dice:.4f} | "
        f"val IoU={val_iou:.4f}, Dice={val_dice:.4f}"
    )


In [None]:
for param in model_unet_ft.parameters():
    param.requires_grad = True

simple_img_tf = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
train_ds_simple = CocoSegDataset(
    img_dir=os.path.join(ROOT, "train"),
    ann_file=os.path.join(ROOT, "train", "_annotations.coco.json"),
    img_transform=simple_img_tf,
    mask_transform=mask_tf
)
train_loader_simple = DataLoader(train_ds_simple, batch_size=8, shuffle=True, num_workers=2)

for epoch in range(0, 3):
    tr_loss, tr_iou, tr_dice = train_epoch_improved(
        model_unet_ft, train_loader_simple, optimizer_ft, device
    )
    val_loss, val_iou, val_dice = eval_epoch_improved(
        model_unet_ft, valid_loader, device
    )
    scheduler_ft.step(val_loss)
    print(f"Epoch {epoch} train IoU={tr_iou:.4f}, Dice={tr_dice:.4f} | val IoU={val_iou:.4f}, Dice={val_dice:.4f}")


Реализовываем минимальный UNet

In [None]:
pos_pix, total_pix = 0, 0
for _, m in loaders["train"]:
    pos_pix += (m>0).sum().item()
    total_pix += m.numel()
print("Pos‑ratio:", pos_pix/total_pix)

import torch, torch.nn as nn
class UNetLite(nn.Module):
    def __init__(self, ch=64):
        super().__init__()
        self.enc1 = nn.Sequential(
            nn.Conv2d(3, ch, 3, 1, 1), nn.ReLU(),
            nn.Conv2d(ch, ch, 3, 1, 1), nn.ReLU())
        self.pool1 = nn.MaxPool2d(2)
        self.enc2 = nn.Sequential(
            nn.Conv2d(ch, ch*2, 3, 1, 1), nn.ReLU(),
            nn.Conv2d(ch*2, ch*2, 3, 1, 1), nn.ReLU())
        self.pool2 = nn.MaxPool2d(2)
        self.bridge = nn.Sequential(
            nn.Conv2d(ch*2, ch*4, 3, 1, 1), nn.ReLU(),
            nn.Conv2d(ch*4, ch*4, 3, 1, 1), nn.ReLU())
        self.up2  = nn.ConvTranspose2d(ch*4, ch*2, 2, 2)
        self.dec2 = nn.Sequential(
            nn.Conv2d(ch*4, ch*2, 3, 1, 1), nn.ReLU(),
            nn.Conv2d(ch*2, ch*2, 3, 1, 1), nn.ReLU())
        self.up1  = nn.ConvTranspose2d(ch*2, ch, 2, 2)
        self.dec1 = nn.Sequential(
            nn.Conv2d(ch*2, ch, 3, 1, 1), nn.ReLU(),
            nn.Conv2d(ch, ch, 3, 1, 1), nn.ReLU())
        self.out  = nn.Conv2d(ch, 1, 1)

    def forward(self, x):
        e1 = self.enc1(x)
        e2 = self.enc2(self.pool1(e1))
        b  = self.bridge(self.pool2(e2))
        d2 = self.up2(b)
        d2 = self.dec2(torch.cat([d2, e2], 1))
        d1 = self.up1(d2)
        d1 = self.dec1(torch.cat([d1, e1], 1))
        return self.out(d1)

import numpy as np
import torch.nn.functional as F

def iou_score(pred, mask, eps=1e-6):
    pred = (pred > 0.5).float()
    inter = (pred * mask).sum(dim=(1,2))
    union = (pred + mask).clamp(0,1).sum(dim=(1,2)) - inter
    return ((inter + eps) / (union + eps)).mean().item()

def dice_score(pred, mask, eps=1e-6):
    pred = (pred > 0.5).float()
    inter = (pred * mask).sum(dim=(1,2))
    return ((2*inter + eps) /
            (pred.sum(dim=(1,2)) + mask.sum(dim=(1,2)) + eps)).mean().item()

def train_epoch_fcn(model, loader, optim, criterion, device):
    model.train()
    tot_loss, ious, dices = 0, [], []
    for imgs, masks in loader:
        imgs, masks = imgs.to(device), masks.to(device)
        logits = model(imgs)
        loss = criterion(logits.squeeze(1), masks.float())
        optim.zero_grad(); loss.backward(); optim.step()

        with torch.no_grad():
            probs = torch.sigmoid(logits).squeeze(1)
            ious.append(iou_score(probs, masks))
            dices.append(dice_score(probs, masks))
            tot_loss += loss.item()
    return (tot_loss/len(loader), np.mean(ious), np.mean(dices))

@torch.no_grad()
def eval_epoch_fcn(model, loader, criterion, device):
    model.eval()
    tot_loss, ious, dices = 0, [], []
    for imgs, masks in loader:
        imgs, masks = imgs.to(device), masks.to(device)
        logits = model(imgs)
        loss = criterion(logits.squeeze(1), masks.float())
        probs = torch.sigmoid(logits).squeeze(1)
        ious.append(iou_score(probs, masks))
        dices.append(dice_score(probs, masks))
        tot_loss += loss.item()
    return (tot_loss/len(loader), np.mean(ious), np.mean(dices))


model = UNetLite().to(device)
model_base = UNetLite(ch=64).to(device)

optimizer_base = torch.optim.Adam(model_base.parameters(), lr=1e-4)
criterion_base = nn.BCEWithLogitsLoss()

pos_w = torch.tensor(max(1.0, 1/(pos_pix/total_pix)-1)).to(device)
criterion_base = nn.BCEWithLogitsLoss(pos_weight=pos_w)
optimizer_base = torch.optim.Adam(model_base.parameters(), lr=1e-3)
EPOCHS = 10

for epoch in range(1, EPOCHS+1):
    tr_loss, tr_iou, tr_dice = train_epoch_fcn(
        model_base, train_loader_simple, optimizer_base, criterion_base, device
    )[:3]
    val_loss, val_iou, val_dice = eval_epoch_fcn(
        model_base, valid_loader, criterion_base, device
    )
    print(f"E{epoch} val IoU={val_iou:.3f} Dice={val_dice:.3f}")


Лосс, оптимизатор, трейн луп

In [None]:
from segmentation_models_pytorch.losses import DiceLoss
pos_w = torch.tensor(max(1.0, 1/(pos_pix/total_pix)-1)).to(device)
bce  = nn.BCEWithLogitsLoss(pos_weight=pos_w)
dice = DiceLoss(mode="binary")
def seg_loss(logits, y):
    return bce(logits.squeeze(1), y.float()) + dice(torch.sigmoid(logits), y.unsqueeze(1).float())

opt  = torch.optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(5):
    model.train()
    running_iou, running_dice = [], []
    for i,(x,y) in enumerate(train_loader_simple):
        x,y = x.to(device), y.to(device)
        opt.zero_grad()
        logit = model(x)
        loss  = seg_loss(logit, y)
        loss.backward(); opt.step()
        if i%100==0:
            pred = torch.sigmoid(logit).squeeze(1)
            running_iou.append(iou_score(pred,y).item())
            running_dice.append(dice_score(pred,y).item())
            print(f"ep{epoch+1} b{i}: IOU {np.mean(running_iou):.3f} Dice {np.mean(running_dice):.3f}")

    model.eval(); val_iou,val_dice=[],[]
    with torch.no_grad():
        for x,y in valid_loader:
            x,y = x.to(device), y.to(device)
            pred = torch.sigmoid(model(x)).squeeze(1)
            val_iou.append(iou_score(pred,y).item())
            val_dice.append(dice_score(pred,y).item())
    print(f"ep{epoch+1} VAL: IoU {np.mean(val_iou):.3f} Dice {np.mean(val_dice):.3f}")

# Выводы по ЛР №7

- Бейзлацн UNet после 5 эпох дал примерно IoU ~ 0.10, Dice ~ 0.18, что довольно слабо для практики
- Простые аугментации flip, rotate подняли метрики на пару десятков сотых, но чуда не случилось
- Замена модели на DeepLabV3+ с encoder‑efficientnet‑b3 сразу дала заметный прирост: IoU ~ 0.29, Dice ~ 0.46
- Сделанный облегченный UNet‑lite оказался по середине: качество чуть ниже DeepLab, зато легче и быстрее
- Чтобы улучшить дальше нужно дольше учить (20‑30 эпох) и вероятно добавить более «умные» loss‑функции или аугментации