In [65]:
import os
import sys
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.utils import save_image
from tqdm import tqdm
from sklearn.metrics import jaccard_score
from typing import List

sys.path.append(os.path.join(os.getcwd(), ".."))
from models.dataset import PascalPartDataset
from models.model import UNet
from utils.transforms import Compose, Resize, Normalize, RandomHorizontalFlip

# Параметры

In [40]:
batch_size: int = 4
learning_rate: float = 1e-3
num_epochs: int = 3
num_workers: int = 2
device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Преобразования

In [41]:
transform = Compose(
    [
        Resize((256, 256)),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        RandomHorizontalFlip(),
    ]
)

# Датасет

In [42]:
train_dataset: PascalPartDataset = PascalPartDataset(
    images_path="../data/JPEGImages",
    masks_path="../data/gt_masks",
    transform=transform,
    mode="train",
)
val_dataset: PascalPartDataset = PascalPartDataset(
    images_path="../data/JPEGImages",
    masks_path="../data/gt_masks",
    transform=transform,
    mode="val",
)

# DataLoader

In [43]:
train_loader: DataLoader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers
)
val_loader: DataLoader = DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers
)

# Модель

In [44]:
model: UNet = UNet(n_classes=7).to(device)

# Функция потерь

In [45]:
criterion: nn.CrossEntropyLoss = nn.CrossEntropyLoss()

# Оптимизатор

In [46]:
optimizer: optim.Adam = optim.Adam(model.parameters(), lr=learning_rate)

# Обучение

In [47]:
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for i, (images, masks) in enumerate(tqdm(train_loader)):
        images = images.to(device).float().permute(0, 3, 1, 2)
        masks = masks.long().to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, masks)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)
    print(f"Epoch: {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}")

100%|███████████████████████████████████████| 707/707 [2:10:01<00:00, 11.03s/it]


Epoch: 1/3, Train Loss: 0.8446


100%|███████████████████████████████████████| 707/707 [2:02:08<00:00, 10.36s/it]


Epoch: 2/3, Train Loss: 0.7195


100%|███████████████████████████████████████| 707/707 [1:36:00<00:00,  8.15s/it]

Epoch: 3/3, Train Loss: 0.6972





# Оценка

In [66]:
    model.eval()
    val_loss = 0.0
    iou_scores = []
    with torch.no_grad():
        for images, masks in val_loader:
            images = images.to(device).float()
            masks = masks.long().to(device)

            outputs = model(images)
            loss = criterion(outputs, masks)

            val_loss += loss.item()

            _, predictions = torch.max(outputs, 1)
            iou_scores.append(jaccard_score(masks.cpu().numpy(), predictions.cpu().numpy(), average='weighted'))

    val_loss /= len(val_loader)
    iou_score = np.mean(iou_scores)
    print(f"Epoch: {epoch+1}/{num_epochs}, Val Loss: {val_loss:.4f}, IoU Score: {iou_score:.4f}")

RuntimeError: Given groups=1, weight of size [64, 3, 3, 3], expected input[4, 256, 256, 3] to have 3 channels, but got 256 channels instead

# Сохранение модели

In [18]:
torch.save(model.state_dict(), "../models/unet_model.pth")