# Seed initialization (to make results reproducible)

In [None]:
import torch
import random
import numpy as np

torch.manual_seed(0)
random.seed(0)
np.random.seed(0)
torch.use_deterministic_algorithms(True, warn_only=True)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

# Environment setup (execute for any Step)

## Package install

In [None]:
!pip install wget
!pip install requests gdown
!pip install fvcore
!pip install torchmetrics

## Import

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.patches as mpatches
from PIL import Image
from matplotlib import pyplot as plt
import numpy as np
from pathlib import Path
import wget
import gdown
from torchvision import transforms
from torchvision.datasets import VisionDataset
from torch.utils.data import DataLoader
from enum import Enum
from google.colab import drive
import os
import torch.nn.functional as F
import time
from fvcore.nn import FlopCountAnalysis, flop_count_table
import torch.optim.lr_scheduler as lr_scheduler
import cv2
from torch.nn.utils import clip_grad
import albumentations as A
import shutil

from models.deeplab_v2 import get_deeplab_v2
from models.pidnet import PIDNet
from losses.bondary import BondaryLoss
from losses.cross_entropy import CrossEntropy
from losses.focal import FocalLoss
from losses.ohem import OhemCrossEntropy
from models.discriminator import FCDiscriminator
from models.bisenet import BiSeNet
from models.stdc import STDC

## Variables

In [None]:
DATA_DIR = 'loveDA_dataset'
TRAIN_ZIP = f'{DATA_DIR}/train.zip'
VAL_ZIP = f'{DATA_DIR}/validation.zip'
TEST_ZIP = f'{DATA_DIR}/test.zip'
TRAIN_DIR = f'{DATA_DIR}/train'
VAL_DIR = f'{DATA_DIR}/validation'
TEST_DIR = f'{DATA_DIR}/test'
RURAL_PATH = "Rural"
URBAN_PATH = "Urban"
IMG_PATH = "images_png"
MASK_PATH = "masks_png"
PRETRAINED_WEIGHTS_DIR = 'pretrained_weights'
DEEPLAB_V2_WEIGHTS = f'{PRETRAINED_WEIGHTS_DIR}/DeepLab_resnet_pretrained_imagenet.pth'
STDC1_WEIGHTS = f"{PRETRAINED_WEIGHTS_DIR}/STDC1_pretrained_weights.pth"

IGNORE_INDEX=-1

RGB = 'RGB'
grayscale = 'L'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class Domain(Enum):
    RURAL = 0
    URBAN = 1

class ModelType(Enum):
    DEEPLAB = 0
    PIDNET = 1
    BISENET = 2
    STDC = 3

categories = {
    'BARREN': (0.003921568859368563, (159, 129, 183)),       # Lilla
    'AGRICULTURE': (0.027450980618596077, (255, 195, 128)),  # Arancione
    'BUILDING': (0.007843137718737125, (255, 0, 0)),         # Rosso
    'WATER': (0.01568627543747425, (0, 0, 255)),             # Blu
    'ROAD': (0.0117647061124444, (255, 255, 0)),             # Giallo
    'BG': (0.019607843831181526, (255, 255, 255)),           # Bianco
    'FOREST': (0.0235294122248888, (0, 255, 0))              # Verde
}

categories = dict(sorted(categories.items(), key=lambda item: item[1][0]))

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

num_classes = len(categories.keys())

## Dataset: LoveDA

### Download dataset

#### Without Google Drive

In [None]:
"""
download_directory = Path(DATA_DIR)
if not download_directory.exists():
    download_directory.mkdir(exist_ok=True)

# Zip download

train_zip = Path(TRAIN_ZIP)
if not train_zip.exists():
    !wget -O {TRAIN_ZIP} 'https://zenodo.org/record/5706578/files/Train.zip?download=1'

val_zip = Path(VAL_ZIP)
if not val_zip.exists():
    !wget -O {VAL_ZIP} 'https://zenodo.org/records/5706578/files/Val.zip?download=1'

test_zip = Path(TEST_ZIP)
if not test_zip.exists():
    !wget -O {TEST_ZIP} 'https://zenodo.org/records/5706578/files/Test.zip?download=1'

# Zip extraction

## I suppose to not cancel the original zip since who knows

train_dir = Path(TRAIN_DIR)
if not train_dir.exists():
    !unzip -q {TRAIN_ZIP} -d {DATA_DIR}
    !mv {DATA_DIR}/Train {TRAIN_DIR}

val_dir = Path(VAL_DIR)
if not val_dir.exists():
    !unzip -q {VAL_ZIP} -d {DATA_DIR}
    !mv {DATA_DIR}/Val {VAL_DIR}

test_dir = Path(TEST_DIR)
if not test_dir.exists():
    !unzip -q {TEST_ZIP} -d {DATA_DIR}
    !mv {DATA_DIR}/Test {TEST_DIR}
"""

#### With Google Drive

In [None]:
def download_to_gdrive():
    drive_path_dir = '/content/drive'
    mydrive_path_dir = f'{drive_path_dir}/MyDrive'
    data_path_dir = f'{mydrive_path_dir}/{DATA_DIR}'
    train_path_zip = f'{mydrive_path_dir}/{TRAIN_ZIP}'
    val_path_zip = f'{mydrive_path_dir}/{VAL_ZIP}'
    test_path_zip = f'{mydrive_path_dir}/{TEST_ZIP}'
    train_path_dir = f'{mydrive_path_dir}/{TRAIN_DIR}'
    val_path_dir = f'{mydrive_path_dir}/{VAL_DIR}'
    test_path_dir = f'{mydrive_path_dir}/{TEST_DIR}'

    drive.mount(drive_path_dir)

    download_directory = Path(data_path_dir)
    if not download_directory.exists():
        download_directory.mkdir(exist_ok=True)

    train_zip = Path(train_path_zip)
    if not train_zip.exists():
        !wget -O {train_path_zip} 'https://zenodo.org/record/5706578/files/Train.zip?download=1'

    val_zip = Path(val_path_zip)
    if not val_zip.exists():
        !wget -O {val_path_zip} 'https://zenodo.org/records/5706578/files/Val.zip?download=1'

    test_zip = Path(test_path_zip)
    if not test_zip.exists():
        !wget -O {test_path_zip} 'https://zenodo.org/records/5706578/files/Test.zip?download=1'

def extract_from_gdrive():

    drive_path_dir = '/content/drive'
    mydrive_path_dir = f'{drive_path_dir}/MyDrive'
    data_path_dir = f'{mydrive_path_dir}/{DATA_DIR}'
    train_path_zip = f'{mydrive_path_dir}/{TRAIN_ZIP}'
    val_path_zip = f'{mydrive_path_dir}/{VAL_ZIP}'
    test_path_zip = f'{mydrive_path_dir}/{TEST_ZIP}'
    train_path_dir = f'{mydrive_path_dir}/{TRAIN_DIR}'
    val_path_dir = f'{mydrive_path_dir}/{VAL_DIR}'
    test_path_dir = f'{mydrive_path_dir}/{TEST_DIR}'

    drive.mount(drive_path_dir)

    train_dir = Path(TRAIN_DIR)
    if not train_dir.exists():
        !unzip -q {train_path_zip} -d {DATA_DIR}
        !mv {DATA_DIR}/Train {TRAIN_DIR}

    val_dir = Path(VAL_DIR)
    if not val_dir.exists():
        !unzip -q {val_path_zip} -d {DATA_DIR}
        !mv {DATA_DIR}/Val {VAL_DIR}

    #test_dir = Path(TEST_DIR)
    #if not test_dir.exists():
    #    !unzip -q {test_path_zip} -d {DATA_DIR}
    #    !mv {DATA_DIR}/Test {TEST_DIR}

In [None]:
def copy_to_gdrive():
    drive_path_dir = '/content/drive'
    mydrive_path_dir = f'{drive_path_dir}/MyDrive'
    data_path_dir = f'{mydrive_path_dir}/{DATA_DIR}'
    train_path_zip = f'{mydrive_path_dir}/{TRAIN_ZIP}'
    val_path_zip = f'{mydrive_path_dir}/{VAL_ZIP}'
    test_path_zip = f'{mydrive_path_dir}/{TEST_ZIP}'

    drive.mount(drive_path_dir)

    # Create the directory if it doesn't exist
    os.makedirs(data_path_dir, exist_ok=True)

    # Copy the zip files using shutil.copy
    if not os.path.exists(train_path_zip):
        shutil.copy(TRAIN_ZIP, train_path_zip)
        print(f"Copied {TRAIN_ZIP} to {train_path_zip}")

    if not os.path.exists(val_path_zip):
        shutil.copy(VAL_ZIP, val_path_zip)
        print(f"Copied {VAL_ZIP} to {val_path_zip}")

    if not os.path.exists(test_path_zip):
        shutil.copy(TEST_ZIP, test_path_zip)
        print(f"Copied {TEST_ZIP} to {test_path_zip}")

In [None]:
#download_to_gdrive()
#copy_to_gdrive()
extract_from_gdrive()

### Dataset construction

In [None]:
def pil_loader(path, codify):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert(codify)

def load_images(root_path, directory, img, mask):
    directory_path = root_path / directory
    img_path = directory_path / img
    mask_path = directory_path / mask
    if not img_path.is_dir() or not mask_path.is_dir():
        raise RuntimeError("folder structure different from expected")

    images = [item.name for item in img_path.iterdir()]
    masks = [item.name for item in mask_path.iterdir()]

    if set(images) != set(masks):
        raise RuntimeError("images and masks do not match")

    return images

def generate_bd(mask, edge_pad=False, is_flip=False, edge_size=2):

    y_k_size = 6
    x_k_size = 6

    edge = cv2.Canny(mask, 0, 8)
    kernel = np.ones((edge_size, edge_size), np.uint8)

    if edge_pad:
        edge = edge[y_k_size:-y_k_size, x_k_size:-x_k_size]
        edge = np.pad(edge, ((y_k_size,y_k_size),(x_k_size,x_k_size)), mode='constant')
    edge = (cv2.dilate(edge, kernel, iterations=1)>50)*1.0

    return edge

class LoveDA(VisionDataset):
    def __init__(self, root, img, mask, directories=None, transforms=None, bd=False):
        super(LoveDA, self).__init__(root)

        root_path = Path(root)

        if not root_path.is_dir():
            raise RuntimeError("root should be a directory")

        self.root = root
        self.img_path = img
        self.mask_path = mask
        self.transforms = transforms

        self.image_names = []

        self.bd = bd

        if directories is None:
            raise RuntimeError("at least one directory must be passed")

        directories = [directories] if isinstance(directories, str) else directories

        for d in directories:
          image_names = load_images(root_path, d, img, mask)
          self.image_names.extend([(d, image_name) for image_name in image_names])

    def __getitem__(self, index):
        dir, image_name = self.image_names[index]
        image_path = f'{self.root}/{dir}/{self.img_path}/{image_name}'
        mask_path = f'{self.root}/{dir}/{self.mask_path}/{image_name}'

        image = pil_loader(image_path, RGB)
        mask = pil_loader(mask_path, grayscale)

        image = np.array(image)
        mask = np.array(mask)

        if self.transforms is not None:
          data = self.transforms(image=image, mask=mask)
          image = data['image']
          mask = data['mask']

        image = transforms.ToTensor()(image)
        mask = transforms.ToTensor()(mask).squeeze(0)
        mask = transforms.ToPILImage()(mask)
        mask = transforms.PILToTensor()(mask).squeeze(0).long()

        mask = mask - 1

        if self.bd:
            bd = generate_bd(mask.numpy().astype(np.uint8))

            return image, mask, bd

        return image, mask

    def __len__(self):
        length = len(self.image_names)
        return length

### Statistics and metrics

#### Average, Standard deviation

In [None]:
def compute_avg_std(dataset, dataloader, device):
    with torch.no_grad():
        avg = torch.zeros((1,3)).to(device)
        std = torch.zeros((1,3)).to(device)
        data_len = 0
        tot_pixels = 0

        assert len(dataloader) > 0, "Dataloader must contain some data"

        tot_batches = len(dataloader)

        for (step, (inputs, labels)) in enumerate(dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            b, _, h, w = inputs.shape

            data_len += b
            tot_pixels += b * h * w
            avg += torch.sum(inputs, dim=(0,2,3))
            std += torch.sum(inputs * inputs, dim=(0,2,3))

        avg /= tot_pixels
        std = torch.sqrt(std / tot_pixels - avg * avg)

        return data_len, avg.flatten().tolist(), std.flatten().tolist()

#### IoU

In [None]:
def calculate_iou(outputs, masks, num_classes):

    # Get predictions from the model output probabilities
    _, preds = torch.max(outputs, dim=1) # B x H x W

    # IoU for each class
    iou_per_class = torch.zeros(num_classes, dtype=torch.float32, device=outputs.device)

    for i in range(num_classes):  # Iterate over all classes
        pred_mask = preds == i
        label_mask = masks == i

        intersection = torch.logical_and(pred_mask, label_mask).sum().float()
        union = torch.logical_or(pred_mask, label_mask).sum().float()

        if union > 0:
            iou_per_class[i] = intersection / union

    # Calculate mIoU for classes with a non-zero IoU
    valid_ious = iou_per_class
    miou = valid_ious.mean() if len(valid_ious) > 0 else torch.tensor(0.0, device=outputs.device)

    return miou, iou_per_class



#### Latency, FPS

In [None]:
def calculate_latency_fps(model, device, height, width, iterations, model_type: ModelType):
    image = torch.randn(1, 3, height, width).to(device)
    mask = None
    boundary = None

    if model_type == ModelType.PIDNET:
        mask = torch.randint(0, num_classes, (1, height, width), dtype=torch.int64).to(device)
        boundary = torch.randint(0, 2, (1, height, width), dtype=torch.float64).to(device)

    latency = []
    FPS = []

    for _ in range(iterations):
        start = time.time()

        with torch.no_grad():
            if model_type == ModelType.DEEPLAB:
                _ = model(image)
            else:
                _ = model(image, mask, boundary)

        end = time.time()

        latency_i = end - start
        latency.append(latency_i)

        FPS_i = 1 / latency_i
        FPS.append(FPS_i)

    meanLatency = np.mean(latency) * 1000 # millis
    stdLatency = np.std(latency) * 1000
    meanFPS = np.mean(FPS)
    stdFPS = np.std(FPS)

    return meanLatency, stdLatency, meanFPS, stdFPS

#### FLOPS, Params

In [None]:
def calculate_flops_params(model, device, height, width, model_type: ModelType):
    image = torch.zeros(1, 3, height, width).to(device)
    model = model.to(device)
    flops = None
    if model_type == ModelType.PIDNET:
        mask = torch.zeros(1, height, width, dtype=torch.int64).to(device)
        boundary = torch.zeros(1, height, width, dtype=torch.float64).to(device)
        flops = FlopCountAnalysis(model, (image, mask, boundary))
    else:
        flops = FlopCountAnalysis(model, image)
    print(flop_count_table(flops))

### Plot losses and mious

In [None]:
def plot_losses_mious(train_losses, eval_losses, miou_scores, num_epochs):
    # Crea una figura con due assi, disposti uno accanto all'altro
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 5))

    # Disegna il grafico delle perdite di training e validation sul primo asse
    ax1.plot(train_losses, label='Training Loss')
    ax1.plot(eval_losses, label='Validation Loss')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.set_xticks(range(0, num_epochs), range(1, num_epochs + 1))
    ax1.set_title('Training and Validation Loss')
    ax1.legend()
    ax1.grid()

    # Disegna il grafico di mIoU sul secondo asse
    ax2.plot(miou_scores, label='mIoU')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('mIoU')
    ax2.set_xticks(range(0, num_epochs), range(1, num_epochs + 1))
    ax2.set_title('mIoU')
    ax2.legend()
    ax2.grid()

    # Mostra la figura
    plt.show()

In [None]:
def plot_mious_per_category(miou_scores, num_epochs):
    plt.figure(figsize=(10, 6))
    for class_name, miou_values in miou_scores.items():
        plt.plot(range(num_epochs), miou_values, label=class_name)

    plt.xlabel('Epoch')
    plt.ylabel('mIoU (%)')
    plt.xticks(range(0, num_epochs), range(1, num_epochs + 1))
    plt.title('mIoU per Class over Epochs')
    plt.legend()
    plt.grid(True)
    plt.show()

### Checkpoint resume

In [None]:
def resume_checkpoint(resume_path, model, optimizer=None, scheduler=None):
    checkpoint = torch.load(resume_path)
    iteration = checkpoint['iteration'] + 1
    model.load_state_dict(checkpoint['model'])
    if optimizer is not None:
      optimizer.load_state_dict(checkpoint['optimizer'])
    if scheduler is not None:
      scheduler.load_state_dict(checkpoint['scheduler'])
    return iteration, model, optimizer, scheduler

In [None]:
def save_checkpoint(path, iteration, model, optimizer, scheduler):
    checkpoint = {
        'iteration': iteration,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'scheduler': scheduler.state_dict()
    }
    torch.save(checkpoint, path)

### Image visualization

In [None]:
def plot_tensor_mask(mask_tensor, categories):

    categories = dict(sorted(categories.items(), key=lambda item: item[1][0]))

    # Convert mask tensor to numpy array
    mask_array = mask_tensor.squeeze().numpy()

    # Create a colored mask image
    colored_mask = np.zeros((mask_array.shape[0], mask_array.shape[1], 3), dtype=np.uint8)
    for i, (label, (value, color)) in enumerate(categories.items()):
        mask = mask_array == i
        colored_mask[mask] = color

    # Display the colored mask
    plt.figure(figsize=(8, 5))
    plt.imshow(colored_mask)
    plt.axis("off")

    # Create a legend
    legend_patches = [mpatches.Patch(color=np.array(color)/255, label=label) for label, (_, color) in categories.items()]
    plt.legend(handles=legend_patches, bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
    plt.show()


# Step 2a: Testing classic semantic segmentation network

### Download pre-trained weights

In [None]:
weights_dir = Path(PRETRAINED_WEIGHTS_DIR)
if not weights_dir.exists():
    weights_dir.mkdir(exist_ok=True)

deeplab_v2_weights = Path(DEEPLAB_V2_WEIGHTS)
if not deeplab_v2_weights.exists():
    # Replace with the correct Google Drive file ID
    file_id = '1ZX0UCXvJwqd2uBGCX7LI2n-DfMg3t74v'
    gdown.download(id=file_id, output=str(deeplab_v2_weights), quiet=False)

## Model: DeepLabv2


## Run

### Parameters

In [None]:
# Change in case of resume training
RESUME_TRAINING = False
epoch = None

num_epochs = 20
BATCH_SIZE = 6
learning_rate = 1e-3
step_size = 10
gamma = 0.1
resize = 512
w_decay = 1e-3

RESUME_PATH = f"/content/drive/MyDrive/loveDA_dataset/Model training/DeepLab/DeepLabV2_{num_epochs}_{learning_rate}_{step_size}_{gamma}_{resize}_{w_decay}_epoch{epoch}.pth.tar"


### Dataset preprocessing

#### Normalization metrics

In [None]:
#preprocessing_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH)
# No shuffle (waste of time), no drop last (we lose some data)

num_workers = 2 if device.type == 'cuda' else 0

#preprocessing_dataloader = DataLoader(preprocessing_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=num_workers)
#_, avg, std = compute_avg_std(preprocessing_dataset, preprocessing_dataloader, device)

# Poiché il modello è pretrainato su ImageNet, si usano media e varianza di ImageNet
avg = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

#### Training set

In [None]:
train_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
    A.Resize(resize, resize, p=1, always_apply=True)
])
train_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, transforms=train_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

#### Validation set

In [None]:
val_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255)
])
val_dataset = LoveDA(VAL_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, transforms=val_transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

### Training process

#### Model engine

In [None]:
model = get_deeplab_v2(num_classes=num_classes, pretrain=True, pretrain_model_path=DEEPLAB_V2_WEIGHTS).to(device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=w_decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)

#### Model handling

#### Training loop

In [None]:
train_losses = []

if RESUME_TRAINING:
  start_epoch, model, optimizer, scheduler = resume_checkpoint(RESUME_PATH, model, optimizer, scheduler)
else:
  start_epoch = 0

for epoch in range(start_epoch, num_epochs):
    print("### Training mode")
    model.train()
    running_loss = 0.0
    for i, (images, masks) in enumerate(train_loader):
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        outputs, _, _ = model(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (i + 1) % 25 == 0:
            print(f"Processed {i + 1} batches, loss: {running_loss / (i+1)}")

    epoch_loss = running_loss / len(train_loader)
    train_losses.append(epoch_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}")
    path=f"/content/drive/MyDrive/loveDA_dataset/Model training/DeepLab/DeepLabV2_{num_epochs}_{learning_rate}_{step_size}_{gamma}_{resize}_{w_decay}_epoch{epoch}.pth.tar"
    save_checkpoint(path, epoch, model, optimizer, scheduler)

    scheduler.step()

#### Evaluation loop

In [None]:
# Requires saving the models for each epoch

start_epoch_eval = 0

eval_losses = []
mious = []

for epoch in range(start_epoch_eval, num_epochs):

    model = get_deeplab_v2(num_classes=num_classes, pretrain=False)  # Assuming get_deeplab_v2 is defined

    path = f"/content/drive/MyDrive/loveDA_dataset/Model training/DeepLab/DeepLabV2_{num_epochs}_{learning_rate}_{step_size}_{gamma}_{resize}_{w_decay}_epoch{epoch}.pth.tar"
    _, model, _, _ = resume_checkpoint(path, model)

    model.to(device)
    print("### Evaluation mode")
    miou = 0.0 # Accumulator for mIoU
    val_loss = 0.0
    model.eval()
    with torch.no_grad():
        for i, (images, masks) in enumerate(val_loader):
            images = images.to(device)
            masks = masks.to(device)

            # loss
            outputs= model(images)
            loss = criterion(outputs, masks)
            val_loss += loss.item()

            # mIoU
            iou, _ = calculate_iou(outputs, masks, num_classes)
            miou += iou

            if (i + 1) % 25 == 0:
                print(f"Processed {i + 1} batches: loss {val_loss / (i+1)}, mIoU: {miou / (i+1)}")

    val_loss /= len(val_loader)

    eval_losses.append(val_loss)

    miou /= len(val_loader)

    mious.append(miou)

    print(f"Epoch: [{epoch+1}/{num_epochs}], Validation Loss: {val_loss:.4f}, mIoU: {(miou * 100):.2f}%")

### Metric calculation

In [None]:
model.eval()
mean_latency, _, _, _ = calculate_latency_fps(model, device, 1024, 1024, num_epochs, ModelType.DEEPLAB)
print(f"Mean Latency: {mean_latency:.2f} ms")

calculate_flops_params(model, device, 1024, 1024, ModelType.DEEPLAB)

# PIDNet Implementation (execute for any Step from 2b on)

## Model: PIDnet


#### FullModel

In [None]:
class FullModel(nn.Module):

    def __init__(self, model, sem_loss, bd_loss):
        super(FullModel, self).__init__()
        self.model = model
        self.sem_loss = sem_loss
        self.bd_loss = bd_loss

    def pixel_acc(self, pred, label):
        _, preds = torch.max(pred, dim=1)
        valid = (label != IGNORE_INDEX).long()
        acc_sum = torch.sum(valid * (preds == label).long())
        pixel_sum = torch.sum(valid)
        acc = acc_sum.float() / (pixel_sum.float() + 1e-10)
        return acc

    def forward(self, inputs, labels, bd_gt, *args, **kwargs):
        outputs = self.model(inputs, *args, **kwargs)

        if labels is None:
          h, w = inputs.size(2), inputs.size(3)
        else:
          h, w = labels.size(1), labels.size(2)

        ph, pw = outputs[0].size(2), outputs[0].size(3)
        if ph != h or pw != w:
            for i in range(len(outputs)):
                outputs[i] = F.interpolate(outputs[i], size=(
                    h, w), mode='bilinear', align_corners=True)     #from original configs

        if bd_gt is  None:
            return None, outputs, None, None

        acc  = self.pixel_acc(outputs[-2], labels)
        loss_s = self.sem_loss(outputs[:-1], labels)

        loss_b = self.bd_loss(outputs[-1], bd_gt)

        filler = torch.ones_like(labels) * IGNORE_INDEX       #from original configs
        bd_label = torch.where(F.sigmoid(outputs[-1][:,0,:,:])>0.8, labels, filler)

        loss_sb = self.sem_loss([outputs[-2]], bd_label)

        loss = loss_s + loss_b + loss_sb


        return torch.unsqueeze(loss,0), outputs, acc, [loss_s, loss_b, loss_sb]

#### Other functions

In [None]:
def get_seg_model(model_name, num_classes, pretrained_weights, imgnet_pretrained):

    if 's' in model_name:
        model = PIDNet(m=2, n=3, num_classes=num_classes, planes=32, ppm_planes=96, head_planes=128, augment=True)
    elif 'm' in model_name:
        model = PIDNet(m=2, n=3, num_classes=num_classes, planes=64, ppm_planes=96, head_planes=128, augment=True)
    else:
        model = PIDNet(m=3, n=4, num_classes=num_classes, planes=64, ppm_planes=112, head_planes=256, augment=True)

    if imgnet_pretrained:
        pretrained_state = torch.load(pretrained_weights, map_location='cpu')['state_dict']
        model_dict = model.state_dict()
        pretrained_state = {k: v for k, v in pretrained_state.items() if (k in model_dict and v.shape == model_dict[k].shape)}
        model_dict.update(pretrained_state)
        msg = 'Loaded {} parameters!'.format(len(pretrained_state))
        print('Attention!!!')
        print(msg)
        print('Over!!!')
        model.load_state_dict(model_dict, strict = False)
    else:
        pretrained_dict = torch.load(pretrained_weights, map_location='cpu')
        if 'state_dict' in pretrained_dict:
            pretrained_dict = pretrained_dict['state_dict']
        model_dict = model.state_dict()
        pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items() if (k[6:] in model_dict and v.shape == model_dict[k[6:]].shape)}
        msg = 'Loaded {} parameters!'.format(len(pretrained_dict))
        print('Attention!!!')
        print(msg)
        print('Over!!!')
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict, strict = False)

    return model

def get_pred_model(name, num_classes):

    if 's' in name:
        model = PIDNet(m=2, n=3, num_classes=num_classes, planes=32, ppm_planes=96, head_planes=128, augment=False)
    elif 'm' in name:
        model = PIDNet(m=2, n=3, num_classes=num_classes, planes=64, ppm_planes=96, head_planes=128, augment=False)
    else:
        model = PIDNet(m=3, n=4, num_classes=num_classes, planes=64, ppm_planes=112, head_planes=256, augment=False)

    return model

## Download pre-trained weights

In [None]:
weights_dir = Path(PRETRAINED_WEIGHTS_DIR)
if not weights_dir.exists():
    weights_dir.mkdir(exist_ok=True)

PIDNET_S_WEIGHTS = weights_dir / 'pidnet_s_imagenet_pretrained.pth'

pidnet_s_weights = Path(PIDNET_S_WEIGHTS)
if not pidnet_s_weights.exists():
    # Replace with the correct Google Drive file ID

    file_id = '1hIBp_8maRr60-B3PF0NVtaA6TYBvO4y-'
    gdown.download(id=file_id, output=str(pidnet_s_weights), quiet=False)

# Step 2b: Real-time semantic segmentation network

## Run

### Parameters

In [None]:
resize = 512
BATCH_SIZE = 6
num_epochs = 20

LR = 1e-3
MOMEUNTUM = 0.9
WEIGHT_DECAY = 1e-2
STEP_SIZE = 10
GAMMA = 0.1

log_frequency = 25

### Dataset preprocessing

#### Normalization metrics

In [None]:
num_workers = 2 if device.type == 'cuda' else 0

# Poiché il modello è pretrainato su ImageNet, si usano media e varianza di ImageNet
avg = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

#### Training set

In [None]:
train_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
    A.Resize(resize, resize, p=1, always_apply=True)
])
train_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, transforms=train_transform, bd=True)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

#### Validation set

In [None]:
# La resize è bene farla solo sul training set
# La normalizzazione invece può essere applicata anche qui

val_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255)
])
val_dataset = LoveDA(VAL_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, transforms=val_transform, bd=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

### Training process

#### Model engine

In [None]:
pidnet = get_seg_model("pidnet_s", num_classes, PIDNET_S_WEIGHTS,imgnet_pretrained=True)
model = FullModel(pidnet, sem_loss=CrossEntropy(ignore_label=IGNORE_INDEX), bd_loss=BondaryLoss())
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr = LR, momentum = MOMEUNTUM, weight_decay = WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

#### Evaluate function

In [None]:
@torch.no_grad()
def evaluate(model, dataloader, device, ) -> tuple:

    model.eval()

    running_loss = 0.0
    data_len = 0
    iou_scores = 0.0

    for (inputs, masks, boundaries) in dataloader:

        data_len += inputs.size(0)

        inputs = inputs.to(device)
        masks = masks.to(device)
        boundaries = boundaries.to(device)

        # Forward pass
        loss, outputs, acc, loss_list = model(inputs, masks, boundaries)
        running_loss += loss.item()*inputs.size(0)

        # Calculate mIoU
        iou, _ = calculate_iou(outputs[1], masks, num_classes)
        iou_scores += iou*inputs.size(0)

    mIoU = iou_scores/data_len
    loss = running_loss/data_len


    return loss, mIoU

#### Training/evaluation loop

In [None]:
val_losses, val_accuracies = [], []
train_losses, train_accuracies = [], []
miou_scores = []
best_mIoU = -1
best_num_epochs = None

for epoch in range(num_epochs):

    current_step = 0
    train_loss = 0.0
    model.train()
    for (inputs, masks, boundaries) in train_loader:

        inputs = inputs.to(device)
        masks = masks.to(device)

        boundaries = boundaries.to(device)

        # Forward pass
        optimizer.zero_grad()
        loss, outputs, pixel_acc, [loss_s, loss_b, loss_sb] = model(inputs, masks, boundaries)
        train_loss += loss.item()

        # Backward pass
        loss.backward()
        optimizer.step()

        if current_step % log_frequency == 0:
            print(f"Epoch {epoch+1}, Iteration {current_step}, Loss: {loss.item():.3f} Loss_s: {loss_s.item():.3f} Loss_b: {loss_b.item():.3f} Loss_sb: {loss_sb.item():.3f}")

        current_step += 1

    train_loss /= len(train_loader)

    print(f"End of Epoch {epoch+1}")
    print(f"Training loss: {train_loss:.5f}")


    val_loss, val_mean_iou = evaluate(model, val_loader, device)
    print(f"Validation mIoU: {val_mean_iou*100:.3f}%, Validation loss: {val_loss:.5f}")

    val_losses.append(val_loss)
    miou_scores.append(val_mean_iou)
    val_accuracies.append(val_mean_iou.cpu().item())
    train_losses.append(train_loss)

    print()
    # Scheduler is None if learning rate is constant
    if scheduler is not None:
        scheduler.step()

### Metric calculation

In [None]:
model.eval()
mean_latency, _, _, _ = calculate_latency_fps(model, device, 1024, 1024, num_epochs, ModelType.PIDNET)
print(f"Mean Latency: {mean_latency:.2f} ms")
calculate_flops_params(model, device, 1024, 1024, ModelType.PIDNET)

# Step 3a: Evaluating the domain shift problem in Semantic Segmentation

## Run

### Parameters

In [None]:
resize = 512
BATCH_SIZE = 6
num_epochs = 20

LR = 1e-3
MOMEUNTUM = 0.9
WEIGHT_DECAY = 1e-2
STEP_SIZE = 10
GAMMA = 0.1

log_frequency = 25

### Dataset preprocessing

#### Normalization metrics

In [None]:
num_workers = 2 if device.type == 'cuda' else 0

# Poiché il modello è pretrainato su ImageNet, si usano media e varianza di ImageNet
avg = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

#### Training set

In [None]:
train_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
    A.Resize(resize, resize, p=1, always_apply=True)
])

train_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, bd=True, transforms=train_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

#### Validation set

In [None]:
val_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255)
])

val_dataset = LoveDA(VAL_DIR, IMG_PATH, MASK_PATH, directories=RURAL_PATH, bd=True, transforms=val_transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

### Training process

#### Model engine

In [None]:
pidnet = get_seg_model("pidnet_s", num_classes, PIDNET_S_WEIGHTS,imgnet_pretrained=True)
model = FullModel(pidnet, sem_loss=CrossEntropy(ignore_label=IGNORE_INDEX), bd_loss=BondaryLoss())
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr = LR, momentum = MOMEUNTUM, weight_decay = WEIGHT_DECAY)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = STEP_SIZE, gamma = GAMMA)

#### Evaluate function

In [None]:
@torch.no_grad()
def evaluate(model, dataloader, device, ) -> tuple:

    model.eval()

    running_loss = 0.0
    data_len = 0
    iou_scores = 0.0
    ious_per_class = torch.zeros(num_classes)

    for (inputs, masks, boundaries) in dataloader:

        data_len += inputs.size(0)

        inputs = inputs.to(device)
        masks = masks.to(device)
        boundaries = boundaries.to(device)

        # Forward pass
        loss, outputs, acc, loss_list = model(inputs, masks, boundaries)
        running_loss += loss.item()*inputs.size(0)

        # Calculate mIoU
        iou, iou_per_class = calculate_iou(outputs[1], masks, num_classes)
        iou_scores += iou*inputs.size(0)
        ious_per_class+=iou_per_class.cpu()*inputs.size(0)

    mIoU = iou_scores/data_len
    loss = running_loss/data_len
    ious_per_class/=data_len


    return loss, mIoU, ious_per_class

#### Training/evaluation loop

In [None]:
val_losses, val_accuracies = [], []
train_losses, train_accuracies = [], []
miou_scores = []
best_mIoU = -1
best_num_epochs = None


for epoch in range(num_epochs):

    current_step = 0
    train_loss = 0.0
    model.train()
    for (inputs, masks, boundaries) in train_loader:

        inputs = inputs.to(device)
        masks = masks.to(device)

        boundaries = boundaries.to(device)

        # Forward pass
        optimizer.zero_grad()
        loss, outputs, pixel_acc, [loss_s, loss_b, loss_sb] = model(inputs, masks, boundaries)
        train_loss += loss.item()

        # Backward pass
        loss.backward()
        optimizer.step()

        if current_step % log_frequency == 0:
            print(f"Epoch {epoch+1}, Iteration {current_step}, Loss: {loss.item():.3f} Loss_s: {loss_s.item():.3f} Loss_b: {loss_b.item():.3f} Loss_sb: {loss_sb.item():.3f}")

        current_step += 1

    train_loss /= len(train_loader)

    print(f"End of Epoch {epoch+1}")
    print(f"Training loss: {train_loss:.5f}")


    val_loss, val_mean_iou, ious_per_class = evaluate(model, val_loader, device)

    print(f"Validation mIoU: {val_mean_iou*100:.3f}%, Validation loss: {val_loss:.5f}")

    for i, cat in enumerate(categories.keys()):
        print(f"{cat} mIoU: {ious_per_class[i]*100:.3f}")

    val_losses.append(val_loss)
    train_losses.append(train_loss)
    miou_scores.append(val_mean_iou)

    print()
    # Scheduler is None if learning rate is constant
    if scheduler is not None:
        scheduler.step()

miou_scores = list(map(lambda x: x.item(), miou_scores))

### Evaluate using saved models

In [None]:
validation_losses = []
miou_scores = []

starting_epoch = 0

for epoch in range(starting_epoch, num_epochs):
    model_path = f"/content/drive/MyDrive/loveDA_dataset/Model training/PIDNet_{num_epochs}_{LR}_{STEP_SIZE}_{GAMMA}_{resize}_{WEIGHT_DECAY}_{MOMEUNTUM}_{GAMMA}_epoch{epoch}.pth"

    pidnet = get_seg_model("pidnet_s", num_classes, PIDNET_S_WEIGHTS,imgnet_pretrained=True)
    model = FullModel(pidnet, sem_loss=CrossEntropy(ignore_label=IGNORE_INDEX), bd_loss=BondaryLoss())
    model.load_state_dict(torch.load(model_path))

    model = model.to(device)

    val_loss, val_mean_iou, ious_per_class = evaluate(model, val_loader, device)

    validation_losses.append(val_loss)
    miou_scores.append(val_mean_iou)

    print(f"Epoch {epoch+1}")
    print(f"Validation mIoU: {val_mean_iou*100:.3f}%, Validation loss: {val_loss:.5f}")

    for i, cat in enumerate(categories.keys()):
        print(f"{cat} mIoU: {ious_per_class[i]*100:.3f}")

    print()




# Step 3b: Data augmentations to reduce the domain shift

### Parameters

In [None]:
resize = 512
BATCH_SIZE = 6
num_epochs = 20

LR = 1e-3
MOMEUNTUM = 0.9
WEIGHT_DECAY = 1e-2
STEP_SIZE = 10
GAMMA = 0.1

### Dataset preprocessing

#### Normalization metrics

In [None]:
num_workers = 2 if device.type == 'cuda' else 0

# Poiché il modello è pretrainato su ImageNet, si usano media e varianza di ImageNet
avg = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

#### Augmentations

In [None]:
aug_prob = 0.5

augmentations = [
    A.ShiftScaleRotate(p=1),
    A.GridDistortion(p=1),
    A.RandomCrop(height=resize, width=resize, p=1),
    A.HorizontalFlip(p=1),
    A.GaussianBlur(p=1),
    A.GridDropout(p=1),
    A.ColorJitter(p=1),
    A.GaussNoise(var_limit=(0.2, 0.3), p=1),
    A.ChannelDropout(p=1),
    A.RandomSizedCrop(min_max_height=(resize//8, resize), height=resize, width=resize, p=1),
]

selected_indices = [2]

selected_augmentations = A.Compose([augmentations[i] for i in selected_indices], p=aug_prob)

#### Training set

In [None]:
train_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
    selected_augmentations,
    A.Resize(resize, resize, p=1, always_apply=True)
])

train_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, bd=True, transforms=train_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

#### Validation set

In [None]:
val_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255)
])

val_dataset = LoveDA(VAL_DIR, IMG_PATH, MASK_PATH, directories=RURAL_PATH, bd=True, transforms=val_transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

### Training process

#### Model engine

In [None]:
pidnet = get_seg_model("pidnet_s", num_classes, PIDNET_S_WEIGHTS,imgnet_pretrained=True)
model = FullModel(pidnet, sem_loss=CrossEntropy(ignore_label=IGNORE_INDEX), bd_loss=BondaryLoss())
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = LR, momentum = MOMEUNTUM, weight_decay = WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = STEP_SIZE, gamma = GAMMA)

#### Evaluate function

In [None]:
@torch.no_grad()
def evaluate(model, dataloader, device, ) -> tuple:

    model.eval()

    running_loss = 0.0
    data_len = 0
    iou_scores = 0.0
    ious_per_class = torch.zeros(num_classes)

    for (inputs, masks, boundaries) in dataloader:

        data_len += inputs.size(0)

        inputs = inputs.to(device)
        masks = masks.to(device)
        boundaries = boundaries.to(device)

        # Forward pass
        loss, outputs, acc, loss_list = model(inputs, masks, boundaries)
        running_loss += loss.item()*inputs.size(0)

        # Calculate mIoU
        iou, iou_per_class = calculate_iou(outputs[1], masks, num_classes)
        iou_scores += iou*inputs.size(0)
        ious_per_class+=iou_per_class.cpu()*inputs.size(0)

    mIoU = iou_scores/data_len
    loss = running_loss/data_len
    ious_per_class/=data_len


    return loss, mIoU, ious_per_class

#### Training/evaluation loop

In [None]:
val_losses, val_accuracies = [], []
train_losses, train_accuracies = [], []
miou_scores = []
miou_per_category = dict()
best_mIoU = -1
best_num_epochs = None


for epoch in range(num_epochs):

    current_step = 0
    train_loss = 0.0
    model.train()
    for (inputs, masks, boundaries) in train_loader:

        inputs = inputs.to(device)
        masks = masks.to(device)

        boundaries = boundaries.to(device)

        # Forward pass
        optimizer.zero_grad()
        loss, outputs, pixel_acc, [loss_s, loss_b, loss_sb] = model(inputs, masks, boundaries)
        train_loss += loss.item()

        # Backward pass
        loss.backward()
        optimizer.step()

        if current_step % log_frequency == 0:
            print(f"Epoch {epoch+1}, Iteration {current_step}, Loss: {loss.item():.3f} Loss_s: {loss_s.item():.3f} Loss_b: {loss_b.item():.3f} Loss_sb: {loss_sb.item():.3f}")

        current_step += 1

    train_loss /= len(train_loader)

    print(f"End of Epoch {epoch+1}")
    print(f"Training loss: {train_loss:.5f}")


    val_loss, val_mean_iou, ious_per_class = evaluate(model, val_loader, device)


    print(f"Validation mIoU: {val_mean_iou*100:.3f}%, Validation loss: {val_loss:.5f}")

    for i, cat in enumerate(categories.keys()):
        if cat in miou_per_category:
            miou_per_category[cat] += [ious_per_class[i].item()]
        else:
            miou_per_category[cat] = [ious_per_class[i].item()]
        print(f"{cat} mIoU: {ious_per_class[i]*100:.3f}%")

    val_losses.append(val_loss)
    train_losses.append(train_loss)
    miou_scores.append(val_mean_iou)


    print()
    # Scheduler is None if learning rate is constant
    if scheduler is not None:
        scheduler.step()

miou_scores = list(map(lambda x: x.item(), miou_scores))

# Step 4a: Adversarial Domain Adaptation

### FC discriminator

## Run

### Parameters

In [None]:
resize = 512
BATCH_SIZE = 6
num_epochs = 20

LR = 1e-3
MOMEUNTUM = 0.9
WEIGHT_DECAY = 1e-2
STEP_SIZE = 10
GAMMA = 0.1

LAMBDA = 1e-3

log_frequency = 50

LR_D = 1e-5
WEIGHT_DECAY_D = 0

### Dataset preprocessing

#### Normalization metrics

In [None]:
num_workers = 2 if device.type == 'cuda' else 0

# Poiché il modello è pretrainato su ImageNet, si usano media e varianza di ImageNet
avg = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

#### Training set

In [None]:
train_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
    A.RandomCrop(height=resize, width=resize, p=0.5),
    A.Resize(resize, resize, p=1, always_apply=True)
])

train_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, bd=True, transforms=train_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

In [None]:
train_transform_target = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
    #A.RandomCrop(height=resize, width=resize, p=0.5),
    A.Resize(resize, resize, p=1, always_apply=True)
])

train_dataset_target = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=RURAL_PATH, bd=True, transforms=train_transform_target)
train_loader_target = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

#### Validation set

In [None]:
val_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255)
])

val_dataset = LoveDA(VAL_DIR, IMG_PATH, MASK_PATH, directories=RURAL_PATH, bd=True, transforms=val_transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

### Training process

#### Model engine

In [None]:
pidnet = get_seg_model("pidnet_s", num_classes, PIDNET_S_WEIGHTS,imgnet_pretrained=True)
model = FullModel(pidnet, sem_loss=CrossEntropy(ignore_label=IGNORE_INDEX), bd_loss=BondaryLoss())
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr = LR, momentum = MOMEUNTUM, weight_decay = WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = STEP_SIZE, gamma = GAMMA)

domain_criterion = nn.BCEWithLogitsLoss()

model_domain = FCDiscriminator(num_classes=7)
model_domain = model_domain.to(device)
domain_optimizer = torch.optim.Adam(model_domain.parameters(), lr = LR_D, weight_decay = WEIGHT_DECAY_D)

#### Evaluate function

In [None]:
@torch.no_grad()
def evaluate(model, dataloader, device, ) -> tuple:

    model.eval()

    running_loss = 0.0
    data_len = 0
    iou_scores = 0.0
    ious_per_class = torch.zeros(num_classes)

    for (inputs, masks, boundaries) in dataloader:

        data_len += inputs.size(0)

        inputs = inputs.to(device)
        masks = masks.to(device)
        boundaries = boundaries.to(device)

        # Forward pass
        loss, outputs, _, _ = model(inputs, masks, boundaries)
        running_loss += loss.item()*inputs.size(0)

        # Calculate mIoU
        iou, iou_per_class = calculate_iou(outputs[1], masks, num_classes)
        iou_scores += iou*inputs.size(0)
        ious_per_class+=iou_per_class.cpu()*inputs.size(0)

    mIoU = iou_scores/data_len
    loss = running_loss/data_len
    ious_per_class/=data_len


    return loss, mIoU, ious_per_class

#### Training/evaluation loop

In [None]:
val_losses, val_accuracies = [], []
train_losses, train_accuracies = [], []
miou_scores = []
best_mIoU = -1
best_num_epochs = None

for epoch in range(num_epochs):

    current_step = 0
    running_source_loss_seg = 0.0

    loss_G, loss_D = 0, 0

    model.train()
    model_domain.train()


    for (inputs, masks, boundaries), (target_inputs, _, _) in zip(train_loader, train_loader_target):

        inputs = inputs.to(device)
        masks = masks.to(device)
        boundaries = boundaries.to(device)
        target_inputs = target_inputs.to(device)

        # Train G
        for param in model_domain.parameters():
            param.requires_grad = False

        optimizer.zero_grad()
        domain_optimizer.zero_grad()

        ## train with source
        source_loss, [_, source_PIDNET_output, _], _, _ = model(inputs, masks, boundaries)
        source_loss.backward()
        running_source_loss_seg += source_loss.item()

        ## train with target
        _, [_, target_PIDNET_output, _], _, _ = model(target_inputs, None, None)
        preds = F.softmax(target_PIDNET_output, dim=1)
        D_out = model_domain(preds)

        domain_loss = LAMBDA * domain_criterion(D_out, torch.zeros_like(D_out))
        domain_loss.backward()
        loss_G += domain_loss.item()

        # Train D

        for param in model_domain.parameters():
            param.requires_grad = True

        ## train with source
        source_PIDNET_output = source_PIDNET_output.detach()
        preds = F.softmax(source_PIDNET_output, dim=1)
        D_out = model_domain(preds)

        domain_loss = domain_criterion(D_out, torch.zeros_like(D_out))
        domain_loss = domain_loss / 2
        domain_loss.backward()
        loss_D += domain_loss.item()

        ## train with target
        target_PIDNET_output = target_PIDNET_output.detach()
        preds = F.softmax(target_PIDNET_output, dim=1)
        D_out = model_domain(preds)

        domain_loss = domain_criterion(D_out, torch.ones_like(D_out))
        domain_loss = domain_loss / 2
        domain_loss.backward()
        loss_D += domain_loss.item()

        clip_grad.clip_grad_norm_(filter(lambda p: p.requires_grad, model.parameters()), max_norm=35, norm_type=2)
        clip_grad.clip_grad_norm_(filter(lambda p: p.requires_grad, model_domain.parameters()), max_norm=35, norm_type=2)
        optimizer.step()
        domain_optimizer.step()


        if current_step % log_frequency == 0:
            print(f"Epoch {epoch+1}, Iteration {current_step}, Source loss: {running_source_loss_seg/(current_step+1):5f}, Domain loss: {loss_G/(current_step+1):.5f} ({loss_D/(current_step+1):.5f}")
        current_step += 1

    train_loss = running_source_loss_seg/len(train_loader)
    train_domain_loss_G = loss_G/len(train_loader)
    train_domain_loss_D = loss_D/len(train_loader)

    print(f"End of Epoch {epoch+1}")
    print(f"Training loss: {train_loss:.5f}")
    print(f"Domain loss G: {train_domain_loss_G:.5f}")
    print(f"Domain loss D: {train_domain_loss_D:.5f}")

    val_loss, val_mean_iou, ious_per_class = evaluate(model, val_loader, device)

    path=f"/content/drive/MyDrive/loveDA_dataset/Model training/PIDNet/PIDNet_{num_epochs}_{LR}_{STEP_SIZE}_{GAMMA}_{resize}_{WEIGHT_DECAY}_{MOMEUNTUM}_{GAMMA}_epoch{epoch}_DA.pth"
    torch.save(model.state_dict(), path)
    print(f"Validation mIoU: {val_mean_iou*100:.3f}%, Validation loss: {val_loss:.5f}")

    for i, cat in enumerate(categories.keys()):
        print(f"{cat} mIoU: {ious_per_class[i]*100:.3f}")

    val_losses.append(val_loss)
    train_losses.append(train_loss)
    miou_scores.append(val_mean_iou)

    print()
    # Scheduler is None if learning rate is constant
    if scheduler is not None:
        scheduler.step()

miou_scores = list(map(lambda x: x.item()*100, miou_scores))

# Step 4b: Image-to-Image Domain Adaptation




## Mix & EMA model

In [None]:
def oneMix(mask, data = None, target = None):
    #Mix
    if not (data is None):
        stackedMask0, _ = torch.broadcast_tensors(mask[0], data[0])
        data = (stackedMask0*data[0]+(1-stackedMask0)*data[1]).unsqueeze(0)
    if not (target is None):
        stackedMask0, _ = torch.broadcast_tensors(mask[0], target[0])
        target = (stackedMask0*target[0]+(1-stackedMask0)*target[1]).unsqueeze(0)
    return data, target


def generate_class_mask(pred, classes):
    pred, classes = torch.broadcast_tensors(pred.unsqueeze(0), classes.unsqueeze(1).unsqueeze(2))
    N = pred.eq(classes).sum(0)
    return N


def mix(parameters, data=None, target=None):
    assert ((data is not None) or (target is not None))
    data, target = oneMix(mask = parameters["Mix"], data = data, target = target)
    return data, target

def update_ema_variables(ema_model, model, alpha_teacher, iteration):
    # Use the "true" average until the exponential average is more correct
    alpha_teacher = min(1 - 1 / (iteration + 1), alpha_teacher)

    for ema_param, param in zip(ema_model.parameters(), model.parameters()):
        #ema_param.data.mul_(alpha).add_(1 - alpha, param.data)
        ema_param.data[:] = alpha_teacher * ema_param[:].data[:] + (1 - alpha_teacher) * param[:].data[:]
    return ema_model

def create_ema_model(model):
    pidnet = get_seg_model("pidnet_s", num_classes, PIDNET_S_WEIGHTS,imgnet_pretrained=True)
    ema_model = FullModel(pidnet, sem_loss=CrossEntropy(ignore_label=IGNORE_INDEX), bd_loss=BondaryLoss())
    for param in ema_model.parameters():
        param.detach_()
    mp = list(model.parameters())
    mcp = list(ema_model.parameters())
    n = len(mp)
    for i in range(0, n):
        mcp[i].data[:] = mp[i].data[:].clone()
    return ema_model

## Unlabeled loss

In [None]:
def calc_U_loss(outputs, targets_u, sem_loss, bd_loss):
    loss_s = sem_loss(outputs[:-1], targets_u)

    bd_gt = np.zeros_like(targets_u.cpu().numpy(), dtype=np.float32)
    for i, m in enumerate(targets_u):
        bd_gt[i] = generate_bd(m.cpu().numpy().astype(np.uint8))

    bd_gt = torch.from_numpy(bd_gt).to(device)

    loss_b = bd_loss(outputs[-1], bd_gt)

    filler = torch.ones_like(targets_u) * IGNORE_INDEX       #from original configs
    bd_label = torch.where(F.sigmoid(outputs[-1][:,0,:,:])>0.8, targets_u, filler)

    loss_sb = sem_loss([outputs[-2]], bd_label)

    return loss_s + loss_b + loss_sb

## Run

### Parameters

In [None]:
LR = 1e-3
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-2
num_epochs = 20
STEP_SIZE = 10
GAMMA = 0.1

BATCH_SIZE = 6
resize = 512
pixel_weight = "threshold_uniform"
#pixel_weight = False

### Dataset preprocessing

#### Normalization metrics

In [None]:
num_workers = 2 if device.type == 'cuda' else 0

# Poiché il modello è pretrainato su ImageNet, si usano media e varianza di ImageNet
avg = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

#### Training set

In [None]:
train_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
    A.RandomCrop(height=resize, width=resize, p=0.5),
    A.Resize(resize, resize, p=1, always_apply=True)
])

train_transform_target = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
    A.RandomCrop(height=resize, width=resize, p=0.5),
    A.Resize(resize, resize, p=1, always_apply=True)
])

train_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, bd=True, transforms=train_transform)
source_trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)
train_dataset_target = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=RURAL_PATH, bd=True, transforms=train_transform_target)
target_trainloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

#### Validation set

In [None]:
# La resize è bene farla solo sul training set
# La normalizzazione invece può essere applicata anche qui

val_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255)
])

val_dataset = LoveDA(VAL_DIR, IMG_PATH, MASK_PATH, directories=RURAL_PATH, bd=True, transforms=val_transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

### Training process

#### Model engine

In [None]:
pidnet = get_seg_model("pidnet_s", num_classes, PIDNET_S_WEIGHTS,imgnet_pretrained=True)
model = FullModel(pidnet, sem_loss=CrossEntropy(ignore_label=IGNORE_INDEX), bd_loss=BondaryLoss())
model.to(device)

ema_model = create_ema_model(model)
ema_model = ema_model.to(device)

optimizer = optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = STEP_SIZE, gamma = GAMMA)

sem_loss=CrossEntropy(ignore_label=IGNORE_INDEX)
bd_loss=BondaryLoss()

#### Evaluate function

In [None]:
@torch.no_grad()
def evaluate(model, dataloader, device, ) -> tuple:

    model.eval()

    running_loss = 0.0
    data_len = 0
    iou_scores = 0.0
    ious_per_class = torch.zeros(num_classes)

    for (inputs, masks, boundaries) in dataloader:

        data_len += inputs.size(0)

        inputs = inputs.to(device)
        masks = masks.to(device)
        boundaries = boundaries.to(device)

        # Forward pass
        loss, outputs, _, _ = model(inputs, masks, boundaries)
        running_loss += loss.item()*inputs.size(0)

        # Calculate mIoU
        iou, iou_per_class = calculate_iou(outputs[1], masks, num_classes)
        iou_scores += iou*inputs.size(0)
        ious_per_class+=iou_per_class.cpu()*inputs.size(0)

    mIoU = iou_scores/data_len
    loss = running_loss/data_len
    ious_per_class/=data_len


    return loss, mIoU, ious_per_class

#### Training/evaluation loop

In [None]:
ema_model.train()

accumulated_loss_l = []
accumulated_loss_u = []

miou_scores = []
training_losses = []
validation_losses = []

for epoch in range(num_epochs):
    model.train()

    loss_u_value = 0
    loss_l_value = 0

    n = 0
    for (src_images, src_labels, src_bd), (tgt_images, _, _) in zip(source_trainloader, target_trainloader):
        optimizer.zero_grad()

        src_images = src_images.to(device)
        src_labels = src_labels.to(device)
        tgt_images = tgt_images.to(device)
        src_bd = src_bd.to(device)


        L_l, [_, pred, _], _, _ = model(src_images, src_labels, src_bd)

        # _, [_, logits_u_w, _], _, _ = ema_model(tgt_images, None, None)
        _, [_, logits_u_w, _], _, _ = model(tgt_images, None, None)


        pseudo_label = torch.softmax(logits_u_w.detach(), dim=1)
        max_probs, targets_u_w = torch.max(pseudo_label, dim=1)

        inputs_u_s = []
        targets_u = []
        pixel_weights = []


        for i in range(len(src_images)):
            classes = torch.unique(src_labels[i])
            nclasses = classes.shape[0]
            classes = (classes[torch.Tensor(np.random.choice(nclasses, int((nclasses+nclasses%2)/2),replace=False)).long()]).to(device)
            MixMask_i = generate_class_mask(src_labels[i], classes).unsqueeze(0).to(device)

            strong_parameters = {"Mix": MixMask_i}

            inputs_u_si, _ = mix(strong_parameters, data = torch.cat((src_images[i].unsqueeze(0),tgt_images[i].unsqueeze(0))))
            inputs_u_s.append(inputs_u_si)

            _, targets_ui = mix(strong_parameters, target = torch.cat((src_labels[i].unsqueeze(0),targets_u_w[i].unsqueeze(0))))
            targets_u.append(targets_ui)

        inputs_u_s = torch.cat(inputs_u_s)
        _, outputs, _, _ = model(inputs_u_s, None, None)
        logits_u_s = outputs[1]

        targets_u = torch.cat(targets_u).long().to(device)


        if pixel_weight == "threshold_uniform":
            unlabeled_weight = torch.sum(max_probs.ge(0.968).long() == 1).item() / np.size(np.array(targets_u.cpu()))
            pixelWiseWeight = unlabeled_weight * torch.ones(max_probs.shape).to(device)
        elif pixel_weight == "threshold":
            pixelWiseWeight = max_probs.ge(0.968).float().to(device)
        elif pixel_weight == False:
            pixelWiseWeight = torch.ones(max_probs.shape).to(device)


        onesWeights = torch.ones((pixelWiseWeight.shape)).to(device)
        for i in range(len(src_images)):
            _, pixelWiseWeight_i = mix(strong_parameters, target = torch.cat((onesWeights[0].unsqueeze(0),pixelWiseWeight[0].unsqueeze(0))))
            pixel_weights.append(pixelWiseWeight_i)


        pixel_weights = torch.cat(pixel_weights).to(device)


        L_u = calc_U_loss(outputs, targets_u, sem_loss, bd_loss)
        L_u *= torch.mean(pixel_weights)

        loss = L_l + L_u

        loss_l_value += L_l.item()
        loss_u_value += L_u.item()

        loss.backward()
        optimizer.step()


        if n %25 == 0:
          print('\tProcessed {0:d} batches, loss_l = {1:.3f}, loss_u = {2:.3f} loss = {3:.3f}'.format(n, loss_l_value/(n+1), loss_u_value/(n+1),(loss_l_value+loss_u_value)/(n+1)))

        n+=1

    loss_l_value /= len(source_trainloader)
    loss_u_value /= len(target_trainloader)

    accumulated_loss_l.append(loss_l_value)
    accumulated_loss_u.append(loss_u_value)
    training_losses.append(loss_l_value+loss_u_value)

    # Update learning rate
    scheduler.step()

    # update Mean teacher network
    alpha_teacher = 0.99
    ema_model = update_ema_variables(ema_model = ema_model, model = model, alpha_teacher=alpha_teacher, iteration=epoch)

    print('iter = {0:6d}/{1:6d}, loss_l = {2:.3f}, loss_u = {3:.3f} loss = {4:.3f}'.format(epoch+1, num_epochs, loss_l_value, loss_u_value, loss_l_value+loss_u_value))



    val_loss, val_mean_iou, ious_per_class = evaluate(model, val_loader, device)
    print(f"Validation mIoU: {val_mean_iou*100:.3f}%, Validation loss: {val_loss:.5f}")
    for i, cat in enumerate(categories.keys()):
        print(f"{cat} mIoU: {ious_per_class[i]*100:.3f}")
    print()

    validation_losses.append(val_loss)
    miou_scores.append(val_mean_iou)

In [None]:
for i in range(BATCH_SIZE):
  plt.imshow(src_images[i].permute(1,2,0).cpu()*torch.tensor(std)+torch.tensor(avg))
  plt.show()
  plt.imshow(tgt_images[i].permute(1,2,0).cpu()*torch.tensor(std)+torch.tensor(avg))
  plt.show()
  plt.imshow(inputs_u_s[i].permute(1,2,0).cpu()*torch.tensor(std)+torch.tensor(avg))
  plt.show()

  plot_tensor_mask(src_labels[i].cpu(), categories)
  plot_tensor_mask(targets_u_w[i].cpu(), categories)
  plot_tensor_mask(targets_u[i].cpu(), categories)

# Step 5: Improving the results

### Calculate class distribution

In [None]:
urban_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH)
urban_loader = DataLoader(urban_dataset, batch_size=64, worker_init_fn=seed_worker, generator=g)

rural_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=RURAL_PATH)
rural_loader = DataLoader(rural_dataset, batch_size=64, worker_init_fn=seed_worker, generator=g)

urban_classes = dict()
rural_classes = dict()

for (_, masks) in urban_loader:

      masks = masks.to(device)

      for i, cat in enumerate(categories.keys()):
        if cat in urban_classes:
          urban_classes[cat] += torch.count_nonzero(masks == i)
        else:
          urban_classes[cat] = torch.count_nonzero(masks == i)

for (_, masks) in rural_loader:

      masks = masks.to(device)

      for i, cat in enumerate(categories.keys()):
        if cat in rural_classes:
          rural_classes[cat] += torch.count_nonzero(masks == i)
        else:
          rural_classes[cat] = torch.count_nonzero(masks == i)


In [None]:
colors= [np.array(color)/255 for _, color in sorted(categories.values())]

wedges, texts, autotexts= plt.pie([v.cpu().numpy() for v in urban_classes.values()], labels=urban_classes.keys(), colors=colors, autopct='%1.1f%%', pctdistance=0.85, labeldistance=1.1, startangle=90)
for text in texts:
    text.set_fontsize(12)
for autotext in autotexts:
    autotext.set_fontsize(9)
plt.title('Urban Dataset', fontsize=16)
plt.tight_layout()
plt.show()

print("urban_percentage = ", [float(autotext.get_text().strip('%')) for autotext in autotexts])

wedges, texts, autotexts= plt.pie([v.cpu().numpy() for v in rural_classes.values()], labels=rural_classes.keys(), colors=colors, autopct='%1.1f%%', pctdistance=0.85, labeldistance=1.1, startangle=90)
for text in texts:
    text.set_fontsize(12)
for autotext in autotexts:
    autotext.set_fontsize(9)
plt.title("Rural dataset", fontsize=16)
plt.tight_layout()
plt.show()

print("rural_percentage = ", [float(autotext.get_text().strip('%')) for autotext in autotexts])



### Calculate class weights

In [None]:
def calc_weights(percentages):
  percentages = np.array(percentages)
  proportions = percentages / 100  # Divide by 100 to convert percentages to fractions

  # Calculate class weights inversely proportional to proportions
  class_weights = 1 / proportions

  # Optional: Normalize weights so the mean is 1
  normalized_weights = class_weights / np.mean(class_weights)

  alpha = 0.5  # Adjust this hyperparameter
  softened_weights = 1 / (proportions ** alpha)
  softened_weights /= np.mean(softened_weights)

  normalized_weights_v2 = class_weights / max(class_weights)


  return list(class_weights), list(normalized_weights), list(softened_weights), list(normalized_weights_v2)


urban_percentage =  [48.5, 21.2, 9.3, 3.7, 7.6, 7.9, 1.9]
rural_percentage =  [42.9, 3.7, 2.6, 11.6, 3.6, 5.0, 30.5]

urban_class_weights , urban_normalized_weights, urban_softened_weights, urban_normalized_weights_v2 = calc_weights(urban_percentage)
rural_class_weights , rural_normalized_weights, rural_softened_weights, rural_normalized_weights_v2 = calc_weights(rural_percentage)

print(f"urban_class_weights = {urban_class_weights}")
print(f"urban_normalized_weights = {urban_normalized_weights}")
print(f"urban_softened_weights = {urban_softened_weights}")
print(f"urban_normalized_weights_v2 = {urban_normalized_weights_v2}")

print()

print(f"rural_class_weights = {rural_class_weights}")
print(f"rural_normalized_weights = {rural_normalized_weights}")
print(f"rural_softened_weights = {rural_softened_weights}")
print(f"rural_normalized_weights_v2 = {rural_normalized_weights_v2}")

We then used **urban_softened_weights** passing them to the Cross Entropy and trained the model. Training loop is not reported again

In [None]:
pidnet = get_seg_model("pidnet_s", num_classes, PIDNET_S_WEIGHTS,imgnet_pretrained=True)
model = FullModel(pidnet, sem_loss=CrossEntropy(ignore_label=IGNORE_INDEX, weight=torch.tensor(urban_softened_weights)), bd_loss=BondaryLoss())
model = model.to(device)

# Step 5: BiSeNet

## Run

### Parameters

In [None]:
num_epochs = 20
BATCH_SIZE = 6
learning_rate = 1e-3
step_size = 10
gamma = 0.1
resize = 512
w_decay = 1e-4

### Dataset preprocessing

#### Normalization metrics

In [None]:
num_workers = 2 if device.type == 'cuda' else 0

# Poiché il modello è pretrainato su ImageNet, si usano media e varianza di ImageNet
avg = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

#### Training set

In [None]:
train_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
    A.Resize(resize, resize, p=1, always_apply=True)
])
train_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, transforms=train_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

#### Validation set

In [None]:
val_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255)
])
#val_dataset = LoveDA(VAL_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, transforms=val_transform)
val_dataset = LoveDA(VAL_DIR, IMG_PATH, MASK_PATH, directories=RURAL_PATH, transforms=val_transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

### Training process

#### Model engine

In [None]:
model = BiSeNet(num_classes,'resnet101').to(device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=w_decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)

#### Evaluate function

In [None]:
@torch.no_grad()
def evaluate(model, dataloader, device, ) -> tuple:

    model.eval()

    running_loss = 0.0
    data_len = 0
    iou_scores = 0.0
    ious_per_class = torch.zeros(num_classes)

    for i, (inputs, masks) in enumerate(dataloader):

        data_len += inputs.size(0)

        inputs = inputs.to(device)
        masks = masks.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, masks)

        running_loss += loss.item()*inputs.size(0)

        # Calculate mIoU
        iou, iou_per_class = calculate_iou(outputs, masks, num_classes)
        iou_scores += iou*inputs.size(0)
        ious_per_class+=iou_per_class.cpu()*inputs.size(0)

    mIoU = iou_scores/data_len
    loss = running_loss/data_len
    ious_per_class/=data_len


    return loss, mIoU, ious_per_class

#### Training loop

In [None]:
train_losses = []
eval_losses = []
mious = []

for epoch in range(num_epochs):
    print("### Training mode")
    model.train()
    running_loss = 0.0
    for i, (images, masks) in enumerate(train_loader):
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        outputs, outputs16, outputs32 = model(images)
        loss1 = criterion(outputs, masks)
        loss2 = criterion(outputs16, masks)
        loss3 = criterion(outputs32, masks)
        loss = loss1 + loss2 + loss3
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (i + 1) % 25 == 0:
            print(f"Processed {i + 1} batches, loss: {running_loss / (i+1)}")

    epoch_loss = running_loss / len(train_loader)
    train_losses.append(epoch_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}")

    print("### Evaluation mode")
    val_loss, miou, ious_per_class = evaluate(model, val_loader, device)

    print(f"Validation mIoU: {miou*100:.3f}%, Validation loss: {val_loss:.5f}")
    for i, cat in enumerate(categories.keys()):
        print(f"{cat} mIoU: {ious_per_class[i]*100:.3f}")
    print()

    eval_losses.append(val_loss)

    mious.append(miou)

    print(f"Epoch: [{epoch+1}/{num_epochs}], Validation Loss: {val_loss:.4f}, mIoU: {(miou * 100):.2f}%")

    scheduler.step()

### Metric calculation

In [None]:
model.eval()
mean_latency, _, _, _ = calculate_latency_fps(model, device, 1024, 1024, num_epochs, ModelType.BISENET)
print(f"Mean Latency: {mean_latency:.2f} ms")

calculate_flops_params(model, device, 1024, 1024, ModelType.BISENET)

# Step 5: STDC

### Download pre-trained weights

In [None]:
weights_dir = Path(PRETRAINED_WEIGHTS_DIR)
if not weights_dir.exists():
    weights_dir.mkdir(exist_ok=True)

stdc1_weights = Path(STDC1_WEIGHTS)
if not stdc1_weights.exists():
    # Replace with the correct Google Drive file ID
    file_id = "1DFoXcV42zy-apUcMh5P8WhsXMRJofgl8"
    gdown.download(id=file_id, output=str(stdc1_weights), quiet=False)

## Run

### Parameters

In [None]:
num_epochs = 20
BATCH_SIZE = 6
learning_rate = 1e-3
step_size = 10
gamma = 0.1
resize = 512
w_decay = 1e-4

### Dataset preprocessing

#### Normalization metrics

In [None]:
num_workers = 2 if device.type == 'cuda' else 0

# Poiché il modello è pretrainato su ImageNet, si usano media e varianza di ImageNet
avg = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

#### Training set

In [None]:
train_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
    A.Resize(resize, resize, p=1, always_apply=True)
])

train_dataset = LoveDA(TRAIN_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, transforms=train_transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

#### Validation set

In [None]:
val_transform = A.Compose([
    A.Normalize(mean=avg, std=std, p=1, always_apply=True, max_pixel_value=255),
])

#val_dataset = LoveDA(VAL_DIR, IMG_PATH, MASK_PATH, directories=URBAN_PATH, transforms=val_transform)
val_dataset = LoveDA(VAL_DIR, IMG_PATH, MASK_PATH, directories=RURAL_PATH, transforms=val_transform)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=False, num_workers=num_workers, worker_init_fn=seed_worker, generator=g)

### Training process

#### Model engine

In [None]:


model = STDC(n_classes=num_classes,backbone='STDCNet813', pretrain_model=stdc1_weights).to(device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=w_decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss(ignore_index=IGNORE_INDEX)

#### Evaluate function

In [None]:
@torch.no_grad()
def evaluate(model, dataloader, device, ) -> tuple:

    model.eval()

    running_loss = 0.0
    data_len = 0
    iou_scores = 0.0
    ious_per_class = torch.zeros(num_classes)

    for i, (inputs, masks) in enumerate(dataloader):

        data_len += inputs.size(0)

        inputs = inputs.to(device)
        masks = masks.to(device)

        # Forward pass
        outputs, _, _ = model(inputs)
        loss = criterion(outputs, masks)

        running_loss += loss.item()*inputs.size(0)

        # Calculate mIoU
        iou, iou_per_class = calculate_iou(outputs, masks, num_classes)
        iou_scores += iou*inputs.size(0)
        ious_per_class+=iou_per_class.cpu()*inputs.size(0)

    mIoU = iou_scores/data_len
    loss = running_loss/data_len
    ious_per_class/=data_len


    return loss, mIoU, ious_per_class

#### Training loop

In [None]:
train_losses = []
eval_losses = []
mious = []

for epoch in range(num_epochs):
    print("### Training mode")
    model.train()
    running_loss = 0.0
    for i, (images, masks) in enumerate(train_loader):
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        outputs, outputs16, outputs32 = model(images)
        loss1 = criterion(outputs, masks)
        loss2 = criterion(outputs16, masks)
        loss3 = criterion(outputs32, masks)
        loss = loss1 + loss2 + loss3
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if (i + 1) % 25 == 0:
            print(f"Processed {i + 1} batches, loss: {running_loss / (i+1)}")

    epoch_loss = running_loss / len(train_loader)
    train_losses.append(epoch_loss)
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}")

    print("### Evaluation mode")
    val_loss, miou, ious_per_class = evaluate(model, val_loader, device)

    print(f"Validation mIoU: {miou*100:.3f}%, Validation loss: {val_loss:.5f}")
    for i, cat in enumerate(categories.keys()):
        print(f"{cat} mIoU: {ious_per_class[i]*100:.3f}")
    print()

    eval_losses.append(val_loss)
    mious.append(miou)

    print(f"Epoch: [{epoch+1}/{num_epochs}], Validation Loss: {val_loss:.4f}, mIoU: {(miou * 100):.2f}%")

    scheduler.step()

### Metric calculation

In [None]:
model.eval()
mean_latency, _, _, _ = calculate_latency_fps(model, device, 1024, 1024, num_epochs, ModelType.STDC)
print(f"Mean Latency: {mean_latency:.2f} ms")

calculate_flops_params(model, device, 1024, 1024, ModelType.STDC)