In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import zipfile
import os

# Paths to your zip files in Drive
gta5_zip = "/content/drive/MyDrive/Semantic_Segmentation/GTA5.zip"
cityscapes_zip = "/content/drive/MyDrive/Semantic_Segmentation/Cityscapes.zip"

# Destination folders
gta5_extract_path = "/content/datasets/GTA5"
cityscapes_extract_path = "/content/datasets/Cityscapes"

# Create directories
os.makedirs(gta5_extract_path, exist_ok=True)
os.makedirs(cityscapes_extract_path, exist_ok=True)

# Extract GTA5
with zipfile.ZipFile(gta5_zip, 'r') as zip_ref:
    zip_ref.extractall(gta5_extract_path)

# Extract Cityscapes
with zipfile.ZipFile(cityscapes_zip, 'r') as zip_ref:
    zip_ref.extractall(cityscapes_extract_path)

print("✅ GTA5 dataset extracted")
print("✅ Cityscapes dataset extracted")


✅ GTA5 dataset extracted
✅ Cityscapes dataset extracted


In [3]:
import shutil
import os

# Fix Cityscapes structure
base_city = "/content/datasets/Cityscapes"
wrong_city_nested = os.path.join(base_city, "Cityscapes", "Cityspaces")

if os.path.exists(wrong_city_nested):
    for sub in ["gtFine", "images"]:
        src = os.path.join(wrong_city_nested, sub)
        dst = os.path.join(base_city, sub if sub == "gtFine" else "leftImg8bit")
        shutil.move(src, dst)
    shutil.rmtree(os.path.join(base_city, "Cityscapes"))
    print("✅ Fixed Cityscapes structure")

# Fix GTA5 structure
base_gta = "/content/datasets/GTA5"
wrong_gta_nested = os.path.join(base_gta, "GTA5")

if os.path.exists(wrong_gta_nested):
    for sub in ["images", "labels"]:
        shutil.move(os.path.join(wrong_gta_nested, sub), os.path.join(base_gta, sub))
    shutil.rmtree(wrong_gta_nested)
    print("✅ Fixed GTA5 structure")


✅ Fixed Cityscapes structure
✅ Fixed GTA5 structure


Defining Dataset Classes with Augmentations

In [4]:
import os
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
import random
import numpy as np
import torch

# ✅ GTA5 → Cityscapes label remapping
GTA5_TO_CITYSCAPES = {
    7: 0, 8: 1, 11: 2, 12: 3, 13: 4, 15: 5, 17: 6, 19: 7, 20: 8,
    21: 9, 22: 10, 23: 11, 24: 12, 26: 13, 27: 14, 28: 15,
    31: 16, 32: 17, 33: 18
}

class GTA5Dataset(Dataset):
    def __init__(self, root, transform=None, target_transform=None, augment=False):
        self.image_dir = os.path.join(root, "images")
        self.label_dir = os.path.join(root, "labels")
        self.images = sorted(os.listdir(self.image_dir))
        self.labels = sorted(os.listdir(self.label_dir))
        self.transform = transform
        self.target_transform = target_transform
        self.augment = augment

    def __len__(self):
        return len(self.images)

    def encode_labels(self, mask):
        remapped = np.full_like(mask, 255)
        for gta_id, city_id in GTA5_TO_CITYSCAPES.items():
            remapped[mask == gta_id] = city_id
        return remapped

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.label_dir, self.labels[idx])

        img = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path)

        if self.augment and random.random() < 0.5:
            img = transforms.functional.hflip(img)
            mask = transforms.functional.hflip(mask)

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask)
            mask = self.encode_labels(mask.squeeze().numpy())
            mask = torch.from_numpy(mask).long().unsqueeze(0)

        return img, mask


Setting up the Cityscapes Validation Dataset and Dataloaders

In [5]:
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image

# ✅ Transformations for images and masks
image_transform = transforms.Compose([
    transforms.Resize((512, 1024)),  # H x W
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

mask_transform = transforms.Compose([
    transforms.Resize((512, 1024), interpolation=Image.NEAREST),
    transforms.PILToTensor()
])

# ✅ GTA5 Training Dataset with Augmentation ON
gta5_dataset = GTA5Dataset(
    root='/content/datasets/GTA5',
    transform=image_transform,
    target_transform=mask_transform,
    augment=True  # Augmentation Step 1: Horizontal Flip
)

# ✅ Cityscapes Validation Dataset
class CityscapesDataset(Dataset):
    def __init__(self, root_dir, split='val', transform=None, target_transform=None):
        self.image_dir = os.path.join(root_dir, "leftImg8bit", split)
        self.label_dir = os.path.join(root_dir, "gtFine", split)
        self.transform = transform
        self.target_transform = target_transform

        self.images = []
        self.labels = []
        for city in os.listdir(self.image_dir):
            img_folder = os.path.join(self.image_dir, city)
            label_folder = os.path.join(self.label_dir, city)
            for file_name in os.listdir(img_folder):
                if file_name.endswith("_leftImg8bit.png"):
                    base = file_name.replace("_leftImg8bit.png", "")
                    img_path = os.path.join(img_folder, file_name)
                    label_path = os.path.join(label_folder, base + "_gtFine_labelTrainIds.png")
                    if os.path.exists(label_path):
                        self.images.append(img_path)
                        self.labels.append(label_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx]).convert("RGB")
        mask = Image.open(self.labels[idx])
        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask).long()
        return img, mask

val_dataset = CityscapesDataset(
    root_dir='/content/datasets/Cityscapes',
    transform=image_transform,
    target_transform=mask_transform
)

# ✅ DataLoaders
train_loader = DataLoader(gta5_dataset, batch_size=2, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=2, pin_memory=True)

print(f"✅ GTA5 dataset size: {len(gta5_dataset)} | Cityscapes val size: {len(val_dataset)}")


✅ GTA5 dataset size: 2500 | Cityscapes val size: 500


Defining the dataloaders

In [6]:
from torch.utils.data import DataLoader

# ✅ Dataloaders
train_loader = DataLoader(gta5_dataset, batch_size=2, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=2, pin_memory=True)

print(f"✅ Dataloaders ready | Train: {len(train_loader)} batches | Val: {len(val_loader)} batches")


✅ Dataloaders ready | Train: 1250 batches | Val: 250 batches


In [7]:
# ✅ Clone the official project repo
!git clone https://github.com/Gabrysse/MLDL2024_project1.git

Cloning into 'MLDL2024_project1'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 34 (delta 9), reused 3 (delta 3), pack-reused 15 (from 1)[K
Receiving objects: 100% (34/34), 11.29 KiB | 11.29 MiB/s, done.
Resolving deltas: 100% (9/9), done.


In [8]:
import sys
sys.path.append('/content/MLDL2024_project1')

from models.bisenet.build_bisenet import BiSeNet
import torch

# ✅ Load BiSeNet with ResNet18 and move to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BiSeNet(num_classes=19, context_path='resnet18')
model = model.to(device)

print("✅ BiSeNet with ResNet18 loaded and moved to", device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 207MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:00<00:00, 206MB/s]


✅ BiSeNet with ResNet18 loaded and moved to cuda


Defining the Loss, Optimizer, and AMP Scaler

In [9]:
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import GradScaler

# ✅ Loss function — ignore label 255
criterion = nn.CrossEntropyLoss(ignore_index=255)

# ✅ Optimizer (SGD)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

# ✅ Mixed precision scaler for AMP
scaler = GradScaler()

print("✅ Loss, optimizer, and AMP scaler initialized.")


✅ Loss, optimizer, and AMP scaler initialized.


Training (Augmentation 1 — Horizontal Flip)

In [10]:
from torch.cuda.amp import autocast
from tqdm import tqdm
import gc
import os

epochs = 50
best_val_loss = float('inf')
save_path = "/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_aug1.pth"

print("🟢 Starting BiSeNet training with Horizontal Flip Augmentation...")

for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    loop = tqdm(train_loader, total=len(train_loader), desc=f"Epoch [{epoch+1}/{epochs}]", leave=False)

    for images, targets in loop:
        images = images.to(device)
        targets = targets.squeeze(1).long().to(device)

        optimizer.zero_grad()
        with autocast():
            output, aux1, aux2 = model(images)
            loss1 = criterion(output, targets)
            loss2 = criterion(aux1, targets)
            loss3 = criterion(aux2, targets)
            loss = loss1 + 0.4 * (loss2 + loss3)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    avg_train_loss = train_loss / len(train_loader)

    # ✅ Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for val_imgs, val_masks in val_loader:
            val_imgs = val_imgs.to(device)
            val_masks = val_masks.squeeze(1).long().to(device)

            with autocast():
                val_out = model(val_imgs)
                val_loss_batch = criterion(val_out, val_masks)

            val_loss += val_loss_batch.item()

    avg_val_loss = val_loss / len(val_loader)

    # ✅ Save best model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), save_path)
        print(f"💾 Best model saved at epoch {epoch+1} | Val Loss: {avg_val_loss:.4f}")

    gc.collect()
    torch.cuda.empty_cache()
    free_gpu = torch.cuda.mem_get_info()[0] / (1024 ** 3)

    print(f"✅ Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Free GPU: {free_gpu:.2f} GB")

print("🏁 Augmented training complete.")


🟢 Starting BiSeNet training with Horizontal Flip Augmentation...




💾 Best model saved at epoch 1 | Val Loss: 3.1585
✅ Epoch 1 | Train Loss: 1.3188 | Val Loss: 3.1585 | Free GPU: 14.09 GB




✅ Epoch 2 | Train Loss: 0.9177 | Val Loss: 3.6551 | Free GPU: 14.05 GB




💾 Best model saved at epoch 3 | Val Loss: 3.0835
✅ Epoch 3 | Train Loss: 0.8001 | Val Loss: 3.0835 | Free GPU: 14.14 GB




💾 Best model saved at epoch 4 | Val Loss: 2.7473
✅ Epoch 4 | Train Loss: 0.7278 | Val Loss: 2.7473 | Free GPU: 14.08 GB




✅ Epoch 5 | Train Loss: 0.6896 | Val Loss: 3.1206 | Free GPU: 14.01 GB




✅ Epoch 6 | Train Loss: 0.6549 | Val Loss: 3.2686 | Free GPU: 13.89 GB




✅ Epoch 7 | Train Loss: 0.5988 | Val Loss: 4.4595 | Free GPU: 13.95 GB




✅ Epoch 8 | Train Loss: 0.5777 | Val Loss: 3.8104 | Free GPU: 14.08 GB




✅ Epoch 9 | Train Loss: 0.5488 | Val Loss: 5.2375 | Free GPU: 14.07 GB




✅ Epoch 10 | Train Loss: 0.7828 | Val Loss: 3.7564 | Free GPU: 13.99 GB




✅ Epoch 11 | Train Loss: 0.5973 | Val Loss: 3.6777 | Free GPU: 13.96 GB




✅ Epoch 12 | Train Loss: 0.5366 | Val Loss: 4.8938 | Free GPU: 14.05 GB




✅ Epoch 13 | Train Loss: 0.5303 | Val Loss: 3.3479 | Free GPU: 14.01 GB




✅ Epoch 14 | Train Loss: 0.4934 | Val Loss: 4.0415 | Free GPU: 13.94 GB




✅ Epoch 15 | Train Loss: 0.4687 | Val Loss: 3.9914 | Free GPU: 14.05 GB




✅ Epoch 16 | Train Loss: 0.4518 | Val Loss: 4.4608 | Free GPU: 14.09 GB




✅ Epoch 17 | Train Loss: 0.4376 | Val Loss: 5.4620 | Free GPU: 14.07 GB




✅ Epoch 18 | Train Loss: 0.4280 | Val Loss: 5.6254 | Free GPU: 14.03 GB




✅ Epoch 19 | Train Loss: 0.4543 | Val Loss: 6.7120 | Free GPU: 14.03 GB




✅ Epoch 20 | Train Loss: 0.4032 | Val Loss: 4.8725 | Free GPU: 13.93 GB




✅ Epoch 21 | Train Loss: 0.3928 | Val Loss: 7.4521 | Free GPU: 14.02 GB




✅ Epoch 22 | Train Loss: 0.3904 | Val Loss: 4.5124 | Free GPU: 14.12 GB




✅ Epoch 23 | Train Loss: 0.3992 | Val Loss: 5.1738 | Free GPU: 14.04 GB




✅ Epoch 24 | Train Loss: 0.3727 | Val Loss: 4.2011 | Free GPU: 14.11 GB




✅ Epoch 25 | Train Loss: 0.3612 | Val Loss: 4.5690 | Free GPU: 14.07 GB




✅ Epoch 26 | Train Loss: 0.3498 | Val Loss: 5.2068 | Free GPU: 14.02 GB




✅ Epoch 27 | Train Loss: 0.3419 | Val Loss: 4.9231 | Free GPU: 14.06 GB




✅ Epoch 28 | Train Loss: 0.3325 | Val Loss: 5.3270 | Free GPU: 14.07 GB




✅ Epoch 29 | Train Loss: 0.3250 | Val Loss: 4.9339 | Free GPU: 14.06 GB




✅ Epoch 30 | Train Loss: 0.3253 | Val Loss: 5.3346 | Free GPU: 13.97 GB




✅ Epoch 31 | Train Loss: 0.3242 | Val Loss: 5.4281 | Free GPU: 14.03 GB




✅ Epoch 32 | Train Loss: 0.3264 | Val Loss: 6.6278 | Free GPU: 14.11 GB




✅ Epoch 33 | Train Loss: 0.3084 | Val Loss: 6.8159 | Free GPU: 14.00 GB




✅ Epoch 34 | Train Loss: 0.3127 | Val Loss: 5.5634 | Free GPU: 13.98 GB




✅ Epoch 35 | Train Loss: 0.3043 | Val Loss: 4.2039 | Free GPU: 14.04 GB




✅ Epoch 36 | Train Loss: 0.3086 | Val Loss: 4.2590 | Free GPU: 13.99 GB




✅ Epoch 37 | Train Loss: 0.2906 | Val Loss: 4.4253 | Free GPU: 14.09 GB




✅ Epoch 38 | Train Loss: 0.2912 | Val Loss: 6.0295 | Free GPU: 14.10 GB




✅ Epoch 39 | Train Loss: 0.2843 | Val Loss: 4.8113 | Free GPU: 14.04 GB




✅ Epoch 40 | Train Loss: 0.2990 | Val Loss: 5.2873 | Free GPU: 14.05 GB




✅ Epoch 41 | Train Loss: 0.2851 | Val Loss: 4.0306 | Free GPU: 14.08 GB




✅ Epoch 42 | Train Loss: 0.2763 | Val Loss: 5.0709 | Free GPU: 13.98 GB




✅ Epoch 43 | Train Loss: 0.2700 | Val Loss: 4.7329 | Free GPU: 14.09 GB




✅ Epoch 44 | Train Loss: 0.2665 | Val Loss: 4.2859 | Free GPU: 14.11 GB




✅ Epoch 45 | Train Loss: 0.2875 | Val Loss: 4.8495 | Free GPU: 14.07 GB




✅ Epoch 46 | Train Loss: 0.2698 | Val Loss: 4.4937 | Free GPU: 14.02 GB




✅ Epoch 47 | Train Loss: 0.2687 | Val Loss: 5.1597 | Free GPU: 14.01 GB




✅ Epoch 48 | Train Loss: 0.2614 | Val Loss: 4.7317 | Free GPU: 14.04 GB




✅ Epoch 49 | Train Loss: 0.3066 | Val Loss: 5.2422 | Free GPU: 14.05 GB




✅ Epoch 50 | Train Loss: 0.3530 | Val Loss: 5.0233 | Free GPU: 14.04 GB
🏁 Augmented training complete.


In [11]:
# ✅ Save augmented model to Google Drive
aug_model_path = "/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_augmented.pth"
torch.save(model.state_dict(), aug_model_path)
print(f"✅ Augmented model saved to: {aug_model_path}")


✅ Augmented model saved to: /content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_augmented.pth


In [12]:
from models.bisenet.build_bisenet import BiSeNet
from tqdm import tqdm
import torch.nn.functional as F
import numpy as np

# ✅ Reload model and set to eval mode
model = BiSeNet(num_classes=19, context_path='resnet18')
model.load_state_dict(torch.load("/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_augmented.pth"))
model = model.to(device)
model.eval()

# ✅ Metric computation
def compute_miou(preds, labels, num_classes=19):
    ious = []
    preds = preds.detach().cpu().numpy()
    labels = labels.detach().cpu().numpy()

    for cls in range(num_classes):
        pred_inds = preds == cls
        target_inds = labels == cls
        intersection = (pred_inds & target_inds).sum()
        union = (pred_inds | target_inds).sum()
        if union == 0:
            ious.append(float('nan'))  # class not present
        else:
            ious.append(intersection / union)
    return np.array(ious)

# ✅ Evaluate on validation set
ious = []
print("🔍 Evaluating mIoU...")
for images, masks in tqdm(val_loader, total=len(val_loader)):
    images = images.to(device)
    masks = masks.squeeze(1).to(device)
    with torch.no_grad():
        outputs = model(images)
        preds = outputs.argmax(dim=1)
        iou = compute_miou(preds, masks)
        ious.append(iou)

# ✅ Compute final mIoU
ious = np.stack(ious)
mean_iou = np.nanmean(ious)
print(f"📊 Final mIoU with augmentation (GTA5 → Cityscapes): {mean_iou:.4f}")


🔍 Evaluating mIoU...


100%|██████████| 250/250 [00:29<00:00,  8.55it/s]

📊 Final mIoU with augmentation (GTA5 → Cityscapes): 0.0524





In [14]:
import numpy as np
from tqdm import tqdm
import torch

NUM_CLASSES = 19
conf_matrix = np.zeros((NUM_CLASSES, NUM_CLASSES), dtype=np.int64)

model.eval()
with torch.no_grad():
    for images, targets in tqdm(val_loader, desc="🔍 Evaluating per-class IoU"):
        images = images.to(device)
        targets = targets.squeeze(1).to(device)

        preds = torch.argmax(model(images), dim=1)

        for pred, target in zip(preds, targets):
            mask = (target >= 0) & (target < NUM_CLASSES)
            hist = np.bincount(
                NUM_CLASSES * target[mask].cpu().numpy() + pred[mask].cpu().numpy(),
                minlength=NUM_CLASSES ** 2
            ).reshape(NUM_CLASSES, NUM_CLASSES)
            conf_matrix += hist

# Compute per-class IoU
intersection = np.diag(conf_matrix)
union = conf_matrix.sum(1) + conf_matrix.sum(0) - intersection
iou = intersection / np.maximum(union, 1)

labels = [
    'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic light',
    'traffic sign', 'vegetation', 'terrain', 'sky', 'person', 'rider', 'car',
    'truck', 'bus', 'train', 'motorcycle', 'bicycle'
]

for name, score in zip(labels, iou):
    print(f"{name:<15}: {score:.4f}")

print(f"\n📊 Final mIoU with augmentation: {iou.mean():.4f}")


🔍 Evaluating per-class IoU: 100%|██████████| 250/250 [00:29<00:00,  8.60it/s]

road           : 0.0061
sidewalk       : 0.0321
building       : 0.5654
wall           : 0.0534
fence          : 0.0159
pole           : 0.0012
traffic light  : 0.0088
traffic sign   : 0.0028
vegetation     : 0.0000
terrain        : 0.0038
sky            : 0.0012
person         : 0.0009
rider          : 0.0455
car            : 0.2463
truck          : 0.0087
bus            : 0.0000
train          : 0.0154
motorcycle     : 0.0101
bicycle        : 0.0000

📊 Final mIoU with augmentation: 0.0536



