In [1]:
# Do not change the path of this notebook (do not use %cd)
# This is the updates version

# Prepare Enviroment

In [2]:
from google.colab import drive
drive.mount('/content/drive')
PATH_STORE_RESULTS = "/content/drive/MyDrive/MLDL_PROJECT/results/"

Mounted at /content/drive


In [3]:
!pip install torchmetrics fvcore albumentations > /dev/null 2>&1



Collecting torchmetrics
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting yacs>=0.1.6 (from fvcore)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting iopath>=0.1.7 (from fvcore)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath>=0.1.7->fvco

In [4]:
!git clone https://github.com/MateusWiteck/domain_adaptation_semantic_segmentation.git


Cloning into 'domain_adaptation_semantic_segmentation'...
remote: Enumerating objects: 152, done.[K
remote: Counting objects: 100% (85/85), done.[K
remote: Compressing objects: 100% (57/57), done.[K
remote: Total 152 (delta 45), reused 54 (delta 28), pack-reused 67 (from 2)[K
Receiving objects: 100% (152/152), 2.71 MiB | 26.46 MiB/s, done.
Resolving deltas: 100% (68/68), done.


In [5]:
%run domain_adaptation_semantic_segmentation/train.py

In [6]:
# Download Citiescapes Dataset:
#!pip install -q gdown
#!gdown --id 1Qb4UrNsjvlU-wEsR9d7rckB0YS_LXgb2 --output Cityscapes.zip

# Copy from the drive
#!cp /content/drive/MyDrive/MLDL_PROJECT/Cityscapes.zip /content/
#!mkdir -p /content/domain_adaptation_semantic_segmentation/data

# Unzip
#!unzip -q /content/Cityscapes.zip -d /content/domain_adaptation_semantic_segmentation/data/


In [7]:
# Download GTA Dataset:
#!pip install -q gdown
#!gdown --id 1xYxlcMR2WFCpayNrW2-Rb7N-950vvl23 --output GTA5.zip

# Copy from Drive
!cp /content/drive/MyDrive/MLDL_PROJECT/GTA5.zip /content/
!mkdir -p /content/domain_adaptation_semantic_segmentation/data

# Unzip
!unzip -q /content/GTA5.zip -d /content/domain_adaptation_semantic_segmentation/data/


In [8]:
%cd /content/domain_adaptation_semantic_segmentation

/content/domain_adaptation_semantic_segmentation


# Task 3: DOMAIN SHIFT
From now on, we will employ BiSeNet as our segmentation to ease the resource requirements of the next experiments.
Consider as upper bound the results obtained in Table 2, i.e. the segmentation networks trained on the labeled target images (Cityscapes).


## A) Evaluating the domain shift problem in Semantic Segmentation
In semantic segmentation collecting manually annotated images is expensive. A popular solution consists in adopting synthetic datasets (i.e. artificial images generated in a simulation environment).

Specifically, in this step we employ the synthetic images from GTA5 [5] (source domain) to train our real-time segmentation network, which is then evaluated on the real images from Cityscapes [5] (target domain).

- Dataset: GTA5 [5]

- Training Set:  GTA5

- Validation Set: Cityscapes [5] validation split

- Training epochs: 50

- Training resolution (GTA5): 1280x720

- Test resolution (Cityscapes): 1024x512

- Backbone: ResNet18 (pre-trained on ImageNet) [2]

- Semantic Classes: 19

- Metrics: mIoU

In [9]:
# LIB Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# Set seed for reproducibility
torch.manual_seed(335079)

if torch.cuda.is_available():
    torch.device('cuda')
else:
    raise Exception('No GPU available')

In [13]:
# Clean the enviroment
torch.cuda.empty_cache()
torch.cuda.ipc_collect()

# ---------- Start TASK 2A ---------------------------

from datasets import GTA5, GTA5WithAug
from models.bisenet.build_bisenet import BiSeNet
from torch.utils.data import random_split
from transformations import NoTransform

# === Hyperparameters ===
num_classes = 19
batch_size = 8
num_workers = 2
learning_rate_cs = 2.5e-4
num_epochs = 50
training_resolution_gta = (720, 1280)
test_resolution = (512,1024)


Set up the datasets

In [14]:
from transformations import (
    RandomRotationTransform,
    RandomFlipTransform,
    RandomBlurTransform,
    RandomEraseTransform,
    NoTransform
)
import torch
from torch.utils.data import DataLoader, random_split
from torch.utils.data import Subset

# Set up your base dataset (without transform yet)
base_dataset = GTA5(root_dir='data/GTA5', transform=lambda x, y: (x, y))  # identity

# Create split once and reuse (or persist it as discussed earlier)
total_size = len(base_dataset)
train_size = int(0.8 * total_size)
val_size = total_size - train_size
train_indices, val_indices = random_split(range(total_size), [train_size, val_size])

# Build subsets
train_base = Subset(base_dataset, train_indices.indices)
val_base = Subset(base_dataset, val_indices.indices)

# Augmentations
train_transform = NoTransform()
val_transform = NoTransform()

# Wrap subsets with per-split transformations
train_dataset = GTA5WithAug(train_base, transform=train_transform)
val_dataset = GTA5WithAug(val_base, transform=val_transform)

# Dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)


In [15]:

# === Model ===
# Load BISENET
bisenet_model = BiSeNet(num_classes=num_classes, context_path="resnet18").to('cuda')

# Optimizer
criterion = nn.CrossEntropyLoss(ignore_index=255)
bisenet_optimizer = optim.SGD(
    bisenet_model.parameters(),
    lr=learning_rate_cs,
    momentum=0.9,
    weight_decay=0.0005
)

# Train Model
metrics_train_task2b = train_model(
    bisenet_model,
    train_loader,
    val_loader,
    bisenet_optimizer,
    criterion,
    num_classes,
    num_epochs,
    "Task3a"
)



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 236MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:00<00:00, 235MB/s]


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33malessandro-gentile26[0m ([33mmldl_project_2025[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch [1/50]




KeyboardInterrupt: 

Test the result over Cityscapes

In [None]:

# Test dataset: Cityscapes
transform_cs = NoTransform()

test_dataset_cs = CityScapes(
    root_dir='data/Cityscapes/Cityspaces',
    split='val',
    transform=transform_cs
)
test_dataloader_cs = DataLoader(
    test_dataset_cs,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True
)

In [None]:


# Re-create the model architecture (same as when saving)
bisenet_model_cs = BiSeNet(num_classes=num_classes_cs, context_path="resnet18").to('cuda')

# Load the saved weights
bisenet_model_cs.load_state_dict(torch.load(PATH_STORE_RESULTS + 'final_models/task3a.pth'))

results = evaluate_performance(bisenet_model_cs, performance_dataloader_cs, num_classes=19, device='cuda')

# Impressão formatada dos resultados
for k, v in results.items():
    print(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}")




## B) Data augmentations to reduce the domain shift
A naive solution to improve the generalization capability of the segmentation network trained on the synthetic domain consists in the usage of data augmentations during training. Through them, we

i) virtually expand the dataset size and

ii) modify the visual appearance of source (synthetic) images in order to make them more similar to the target (real) ones.


Specifically, we repeat the previous experiment, introducing data augmentations at training time (e.g. horizontal flip, Gaussian Blur, Multiply, ecc.). The decision of what kind of algorithm is left to the student. Set the probability to perform augmentation to 0.5.

In [None]:
from transformations import RandomRotationTransform, RandomFlipTransform, RandomBlurTransform, RandomEraseTransform

list_augmentations = [
  ("RandomRotationTransform", RandomRotationTransform(rotation_limit=70) ),
  ("RandomFlipTransform", RandomFlipTransform() ),
  ("RandomBlurTransform", RandomBlurTransform() ),
  ("RandomEraseTransform", RandomEraseTransform() )
]

for aug_name, aug_transformation in list_augmentations:
  # Wrap subsets with per-split transformations
  train_dataset_ag = GTA5WithAug(train_base, transform=aug_transformation)
  # val_dataset remains the same

  # Dataloaders
  train_loader_ag = DataLoader(
      train_dataset_ag,
      batch_size=8,
      shuffle=True,
      num_workers=2,
      pin_memory=True
  )

  # === Model Training from 0 ===
  # Load BISENET
  bisenet_model_ag = BiSeNet(num_classes=num_classes, context_path="resnet18").to('cuda')

  # Optimizer
  criterion = nn.CrossEntropyLoss(ignore_index=255)
  bisenet_optimizer_ag = optim.SGD(
      bisenet_model_ag.parameters(),
      lr=learning_rate_cs,
      momentum=0.9,
      weight_decay=0.0005
  )

  # Train Model
  metrics_train_task2b = train_model(
      bisenet_model_ag,
      train_loader_ag,
      val_loader,
      bisenet_optimizer_ag,
      criterion,
      num_classes,
      1, #num_epochs
      "Task3b" + aug_name
  )




Epoch [1/1]




Epoch 1/1 - Train Loss: 1.2053, Train mIoU: 0.1376, Train Latency: 4.5047 sec/batch, Val Loss: 0.8063, Val mIoU: 0.2006
Checkpoint saved to /content/drive/MyDrive/MLDL_PROJECT/results/checkpoints/Task3bRandomRotationTransform.pt


0,1
train_latency_per_batch,▁
train_loss,▁
train_mIoU,▁
val_loss,▁
val_mIoU,▁

0,1
train_latency_per_batch,4.50472
train_loss,1.20527
train_mIoU,0.13757
val_loss,0.80632
val_mIoU,0.20061


Epoch [1/1]




Epoch 1/1 - Train Loss: 1.4428, Train mIoU: 0.1471, Train Latency: 4.5117 sec/batch, Val Loss: 0.8907, Val mIoU: 0.2010
Checkpoint saved to /content/drive/MyDrive/MLDL_PROJECT/results/checkpoints/Task3bRandomFlipTransform.pt


0,1
train_latency_per_batch,▁
train_loss,▁
train_mIoU,▁
val_loss,▁
val_mIoU,▁

0,1
train_latency_per_batch,4.51172
train_loss,1.44284
train_mIoU,0.14711
val_loss,0.8907
val_mIoU,0.201


Epoch [1/1]


Validating:  35%|███▍      | 22/63 [01:37<02:30,  3.67s/it]

In [None]:

!git pull
!git fetch --all
!ls