In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
import zipfile
import os

# Google Drive paths to your ZIP files
gta5_zip = "/content/drive/MyDrive/Semantic_Segmentation/GTA5.zip"
cityscapes_zip = "/content/drive/MyDrive/Semantic_Segmentation/Cityscapes.zip"

# Target directories in Colab local storage
gta5_extract_to = "/content/datasets/GTA5"
cityscapes_extract_to = "/content/datasets/Cityscapes"

# Unzip GTA5
if not os.path.exists(gta5_extract_to):
    with zipfile.ZipFile(gta5_zip, 'r') as zip_ref:
        zip_ref.extractall(gta5_extract_to)
    print("✅ GTA5 dataset extracted")

# Unzip Cityscapes
if not os.path.exists(cityscapes_extract_to):
    with zipfile.ZipFile(cityscapes_zip, 'r') as zip_ref:
        zip_ref.extractall(cityscapes_extract_to)
    print("✅ Cityscapes dataset extracted")


✅ GTA5 dataset extracted
✅ Cityscapes dataset extracted


In [4]:
import shutil
import os

# ✅ GTA5: move inner images & labels out, delete nested folder
gta5_base = "/content/datasets/GTA5"
nested_gta5 = os.path.join(gta5_base, "GTA5")

if os.path.exists(os.path.join(nested_gta5, "images")):
    shutil.move(os.path.join(nested_gta5, "images"), gta5_base)
    shutil.move(os.path.join(nested_gta5, "labels"), gta5_base)
    shutil.rmtree(nested_gta5)
    print("✅ Fixed GTA5 folder structure.")

# ✅ Cityscapes: fix double nesting and spelling error (Cityspaces → leftImg8bit)
city_base = "/content/datasets/Cityscapes"
nested_city = os.path.join(city_base, "Cityscapes", "Cityspaces")

if os.path.exists(nested_city):
    shutil.move(os.path.join(nested_city, "gtFine"), city_base)
    shutil.move(os.path.join(nested_city, "images"), os.path.join(city_base, "leftImg8bit"))
    shutil.rmtree(os.path.join(city_base, "Cityscapes"))
    print("✅ Fixed Cityscapes folder structure.")


✅ Fixed GTA5 folder structure.
✅ Fixed Cityscapes folder structure.


In [5]:
import os
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
import torch

# ✅ GTA5 → Cityscapes TrainID Mapping (19 classes)
GTA5_to_Cityscapes = {
    7: 0,     # road
    8: 1,     # sidewalk
    11: 2,    # building
    12: 3,    # wall
    13: 4,    # fence
    17: 5,    # pole
    19: 6,    # traffic light
    20: 7,    # traffic sign
    21: 8,    # vegetation
    22: 9,    # terrain
    23: 10,   # sky
    24: 11,   # person
    25: 12,   # rider
    26: 13,   # car
    27: 14,   # truck
    28: 15,   # bus
    31: 16,   # train
    32: 17,   # motorcycle
    33: 18    # bicycle
}

class GTA5Dataset(Dataset):
    def __init__(self, root, transform=None, target_transform=None):
        self.root = root
        self.images_dir = os.path.join(root, 'images')
        self.labels_dir = os.path.join(root, 'labels')
        self.transform = transform
        self.target_transform = target_transform

        self.images = sorted(os.listdir(self.images_dir))
        self.labels = sorted(os.listdir(self.labels_dir))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = os.path.join(self.images_dir, self.images[idx])
        label_path = os.path.join(self.labels_dir, self.labels[idx])

        image = Image.open(image_path).convert('RGB')
        label = Image.open(label_path)

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
            label = self.remap_labels(label)

        return image, label

    def remap_labels(self, mask_tensor):
        # Convert to numpy
        mask_np = mask_tensor.squeeze().numpy()
        remapped = np.full_like(mask_np, 255)  # set default to ignore index

        for gta_id, city_id in GTA5_to_Cityscapes.items():
            remapped[mask_np == gta_id] = city_id

        # Return as tensor (no channel)
        return torch.from_numpy(remapped).long()

print("✅ GTA5Dataset class defined with correct label remapping.")


✅ GTA5Dataset class defined with correct label remapping.


In [6]:
import os
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

class CityscapesDataset(Dataset):
    def __init__(self, split='val', transform=None, target_transform=None):
        self.split = split
        self.transform = transform
        self.target_transform = target_transform

        self.images = []
        self.masks = []

        images_base = "/content/datasets/Cityscapes/leftImg8bit"
        masks_base = "/content/datasets/Cityscapes/gtFine"

        cities_path = os.path.join(images_base, split)
        for city in os.listdir(cities_path):
            img_dir = os.path.join(cities_path, city)
            mask_dir = os.path.join(masks_base, split, city)

            for file_name in os.listdir(img_dir):
                if file_name.endswith("_leftImg8bit.png"):
                    base = file_name.replace("_leftImg8bit.png", "")
                    img_path = os.path.join(img_dir, file_name)
                    mask_path = os.path.join(mask_dir, base + "_gtFine_labelTrainIds.png")
                    if os.path.exists(mask_path):
                        self.images.append(img_path)
                        self.masks.append(mask_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx]).convert("RGB")
        mask = Image.open(self.masks[idx])

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask)

        return img, mask

print("✅ CityscapesDataset class defined for validation.")


✅ CityscapesDataset class defined for validation.


In [7]:
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.transforms import InterpolationMode
from PIL import Image

# ✅ Standard image transform (GTA5 and Cityscapes)
image_transform = transforms.Compose([
    transforms.Resize((512, 1024), interpolation=InterpolationMode.BILINEAR),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# ✅ Mask transform (nearest neighbor resize only)
mask_transform = transforms.Compose([
    transforms.Resize((512, 1024), interpolation=Image.NEAREST),
    transforms.PILToTensor()  # Keeps shape [1, H, W] and type uint8
])

# ✅ Instantiate datasets
gta5_dataset = GTA5Dataset(
    root='/content/datasets/GTA5',
    transform=image_transform,
    target_transform=mask_transform
)

cityscapes_val_dataset = CityscapesDataset(
    split='val',
    transform=image_transform,
    target_transform=mask_transform
)

# ✅ Create dataloaders
train_loader = DataLoader(gta5_dataset, batch_size=2, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(cityscapes_val_dataset, batch_size=2, shuffle=False, num_workers=2, pin_memory=True)

print(f"✅ GTA5 dataset size: {len(gta5_dataset)} | Cityscapes val size: {len(cityscapes_val_dataset)}")


✅ GTA5 dataset size: 2500 | Cityscapes val size: 500


In [8]:
# ✅ Clone the official project repo
!git clone https://github.com/Gabrysse/MLDL2024_project1.git


Cloning into 'MLDL2024_project1'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 34 (delta 9), reused 3 (delta 3), pack-reused 15 (from 1)[K
Receiving objects: 100% (34/34), 11.29 KiB | 11.29 MiB/s, done.
Resolving deltas: 100% (9/9), done.


In [9]:
import sys
sys.path.append("/content/MLDL2024_project1")

# Now try importing BiSeNet
from models.bisenet.build_bisenet import BiSeNet
print("✅ BiSeNet import successful.")


✅ BiSeNet import successful.


In [10]:
from models.bisenet.build_bisenet import BiSeNet

# ✅ Initialize BiSeNet with ResNet18 backbone
model = BiSeNet(num_classes=19, context_path='resnet18')
model = model.to('cuda')  # Move to GPU if available

print("✅ BiSeNet model initialized and moved to GPU.")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 225MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:00<00:00, 231MB/s]


✅ BiSeNet model initialized and moved to GPU.


In [11]:
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import GradScaler

# ✅ Loss function (ignore class 255 in targets)
criterion = nn.CrossEntropyLoss(ignore_index=255)

# ✅ Optimizer (SGD)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

# ✅ Mixed precision scaler for AMP
scaler = GradScaler()

print("✅ Loss function, optimizer, and mixed precision scaler initialized.")


✅ Loss function, optimizer, and mixed precision scaler initialized.


In [12]:
import torch
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
import gc
import os

# ✅ Training configuration
epochs = 50
best_val_loss = float("inf")
save_path = "/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta2city_best.pth"

print("🟢 Starting BiSeNet training (GTA5 → Cityscapes)...")

for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    loop = tqdm(train_loader, total=len(train_loader), desc=f"Epoch [{epoch+1}/{epochs}]", leave=False)

    for images, targets in loop:
        images = images.to('cuda')
        targets = targets.squeeze(1).long().to('cuda')

        optimizer.zero_grad()

        with autocast():
            output, aux1, aux2 = model(images)
            loss1 = criterion(output, targets)
            loss2 = criterion(aux1, targets)
            loss3 = criterion(aux2, targets)
            loss = loss1 + 0.4 * (loss2 + loss3)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    avg_train_loss = train_loss / len(train_loader)

    # ✅ Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for val_imgs, val_masks in val_loader:
            val_imgs = val_imgs.to('cuda')
            val_masks = val_masks.squeeze(1).long().to('cuda')

            with autocast():
                val_output = model(val_imgs)
                val_loss_batch = criterion(val_output, val_masks)

            val_loss += val_loss_batch.item()

    avg_val_loss = val_loss / len(val_loader)

    # ✅ Save best model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), save_path)
        print(f"💾 Best model saved at epoch {epoch+1} | Val Loss: {avg_val_loss:.4f}")

    # ✅ Memory cleanup
    torch.cuda.empty_cache()
    gc.collect()
    mem_free = torch.cuda.mem_get_info()[0] / (1024 ** 3)

    print(f"✅ Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Free GPU: {mem_free:.2f} GB")

print("🏁 BiSeNet training (Step 3a) complete.")


🟢 Starting BiSeNet training (GTA5 → Cityscapes)...




💾 Best model saved at epoch 1 | Val Loss: 2.2858
✅ Epoch 1 | Train Loss: 1.1986 | Val Loss: 2.2858 | Free GPU: 21.41 GB




💾 Best model saved at epoch 2 | Val Loss: 1.0293
✅ Epoch 2 | Train Loss: 0.8446 | Val Loss: 1.0293 | Free GPU: 21.43 GB




✅ Epoch 3 | Train Loss: 0.7361 | Val Loss: 2.6784 | Free GPU: 21.36 GB




✅ Epoch 4 | Train Loss: 0.6736 | Val Loss: 2.2253 | Free GPU: 21.37 GB




✅ Epoch 5 | Train Loss: 0.6152 | Val Loss: 1.2436 | Free GPU: 21.36 GB




✅ Epoch 6 | Train Loss: 0.5925 | Val Loss: 1.8468 | Free GPU: 21.33 GB




✅ Epoch 7 | Train Loss: 0.5271 | Val Loss: 2.4453 | Free GPU: 21.31 GB




✅ Epoch 8 | Train Loss: 0.4941 | Val Loss: 3.3287 | Free GPU: 21.38 GB




✅ Epoch 9 | Train Loss: 0.4826 | Val Loss: 2.0138 | Free GPU: 21.36 GB




✅ Epoch 10 | Train Loss: 0.4647 | Val Loss: 3.2641 | Free GPU: 21.33 GB




✅ Epoch 11 | Train Loss: 0.4221 | Val Loss: 3.5666 | Free GPU: 21.24 GB




✅ Epoch 12 | Train Loss: 0.4101 | Val Loss: 5.7119 | Free GPU: 21.36 GB




✅ Epoch 13 | Train Loss: 0.3968 | Val Loss: 3.6393 | Free GPU: 21.29 GB




✅ Epoch 14 | Train Loss: 0.3803 | Val Loss: 3.9428 | Free GPU: 21.39 GB




✅ Epoch 15 | Train Loss: 0.3591 | Val Loss: 3.9348 | Free GPU: 21.34 GB




✅ Epoch 16 | Train Loss: 0.3495 | Val Loss: 4.1406 | Free GPU: 21.37 GB




✅ Epoch 17 | Train Loss: 0.3409 | Val Loss: 3.2966 | Free GPU: 21.35 GB




✅ Epoch 18 | Train Loss: 0.3197 | Val Loss: 3.2982 | Free GPU: 21.36 GB




✅ Epoch 19 | Train Loss: 0.3157 | Val Loss: 2.8801 | Free GPU: 21.36 GB




✅ Epoch 20 | Train Loss: 0.3080 | Val Loss: 3.1485 | Free GPU: 21.40 GB




✅ Epoch 21 | Train Loss: 0.3491 | Val Loss: 2.6694 | Free GPU: 21.32 GB




✅ Epoch 22 | Train Loss: 0.3038 | Val Loss: 3.8074 | Free GPU: 21.41 GB




✅ Epoch 23 | Train Loss: 0.2926 | Val Loss: 2.9460 | Free GPU: 21.37 GB




✅ Epoch 24 | Train Loss: 0.2809 | Val Loss: 4.0715 | Free GPU: 21.39 GB




✅ Epoch 25 | Train Loss: 0.2750 | Val Loss: 2.9981 | Free GPU: 21.35 GB




✅ Epoch 26 | Train Loss: 0.2907 | Val Loss: 4.2110 | Free GPU: 21.36 GB




✅ Epoch 27 | Train Loss: 0.2902 | Val Loss: 6.6587 | Free GPU: 21.36 GB




✅ Epoch 28 | Train Loss: 0.2746 | Val Loss: 4.1887 | Free GPU: 21.37 GB




✅ Epoch 29 | Train Loss: 0.2597 | Val Loss: 3.7098 | Free GPU: 21.37 GB




✅ Epoch 30 | Train Loss: 0.2541 | Val Loss: 3.2393 | Free GPU: 21.33 GB




✅ Epoch 31 | Train Loss: 0.2496 | Val Loss: 4.3402 | Free GPU: 21.35 GB




✅ Epoch 32 | Train Loss: 0.2490 | Val Loss: 3.2749 | Free GPU: 21.23 GB




✅ Epoch 33 | Train Loss: 0.2466 | Val Loss: 2.6625 | Free GPU: 21.29 GB




✅ Epoch 34 | Train Loss: 0.2409 | Val Loss: 2.3207 | Free GPU: 21.35 GB




✅ Epoch 35 | Train Loss: 0.2359 | Val Loss: 3.0595 | Free GPU: 21.34 GB




✅ Epoch 36 | Train Loss: 0.2315 | Val Loss: 3.0488 | Free GPU: 21.29 GB




✅ Epoch 37 | Train Loss: 0.2292 | Val Loss: 3.6287 | Free GPU: 21.38 GB




✅ Epoch 38 | Train Loss: 0.2305 | Val Loss: 3.8360 | Free GPU: 21.37 GB




✅ Epoch 39 | Train Loss: 0.2254 | Val Loss: 3.4550 | Free GPU: 21.25 GB




✅ Epoch 40 | Train Loss: 0.3445 | Val Loss: 2.5358 | Free GPU: 21.38 GB




✅ Epoch 41 | Train Loss: 0.3138 | Val Loss: 2.8711 | Free GPU: 21.33 GB




✅ Epoch 42 | Train Loss: 0.2609 | Val Loss: 2.9445 | Free GPU: 21.35 GB




✅ Epoch 43 | Train Loss: 0.2337 | Val Loss: 3.0068 | Free GPU: 21.37 GB




✅ Epoch 44 | Train Loss: 0.2260 | Val Loss: 3.9424 | Free GPU: 21.40 GB




✅ Epoch 45 | Train Loss: 0.2202 | Val Loss: 2.6222 | Free GPU: 21.34 GB




✅ Epoch 46 | Train Loss: 0.2170 | Val Loss: 3.9367 | Free GPU: 21.33 GB




✅ Epoch 47 | Train Loss: 0.2149 | Val Loss: 3.0132 | Free GPU: 21.39 GB




✅ Epoch 48 | Train Loss: 0.2157 | Val Loss: 1.3386 | Free GPU: 21.36 GB




✅ Epoch 49 | Train Loss: 0.2104 | Val Loss: 2.2574 | Free GPU: 21.45 GB




✅ Epoch 50 | Train Loss: 0.2147 | Val Loss: 3.1058 | Free GPU: 21.33 GB
🏁 BiSeNet training (Step 3a) complete.


In [13]:
# ✅ Save final GTA5-trained model to Drive
final_model_path = "/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_final.pth"
torch.save(model.state_dict(), final_model_path)
print(f"✅ Model saved to: {final_model_path}")


✅ Model saved to: /content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_final.pth


In [14]:
import torch
import numpy as np
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Re-load BiSeNet model with ResNet18 backbone
from models.bisenet.build_bisenet import BiSeNet

model = BiSeNet(num_classes=19, context_path='resnet18')
model.load_state_dict(torch.load("/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_final.pth"))
model = model.to(device)
model.eval()

# ✅ mIoU calculation setup
num_classes = 19
conf_matrix = np.zeros((num_classes, num_classes), dtype=np.int64)

def update_conf_matrix(pred, label, conf_matrix):
    mask = (label >= 0) & (label < num_classes)
    hist = np.bincount(
        num_classes * label[mask].astype(int) + pred[mask].astype(int),
        minlength=num_classes ** 2
    ).reshape(num_classes, num_classes)
    conf_matrix += hist
    return conf_matrix

# ✅ Iterate through Cityscapes val set
with torch.no_grad():
    for images, targets in tqdm(val_loader, desc="Evaluating mIoU"):
        images = images.to(device)
        targets = targets.squeeze(1).cpu().numpy()

        outputs = model(images)
        preds = outputs.argmax(dim=1).cpu().numpy()

        for pred, label in zip(preds, targets):
            conf_matrix = update_conf_matrix(pred, label, conf_matrix)

# ✅ Compute mIoU
intersection = np.diag(conf_matrix)
union = conf_matrix.sum(1) + conf_matrix.sum(0) - intersection
iou = intersection / (union + 1e-10)
miou = np.nanmean(iou)

print(f"📊 Final mIoU (GTA5 → Cityscapes): {miou:.4f}")


Evaluating mIoU: 100%|██████████| 250/250 [00:30<00:00,  8.27it/s]

📊 Final mIoU (GTA5 → Cityscapes): 0.1697





In [15]:
import torch
import numpy as np
from tqdm import tqdm
from models.bisenet.build_bisenet import BiSeNet
from sklearn.metrics import confusion_matrix

# ✅ Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 19
class_names = [
    'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic light', 'traffic sign',
    'vegetation', 'terrain', 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle'
]

# ✅ Load model
model = BiSeNet(num_classes=num_classes, context_path='resnet18')
model.load_state_dict(torch.load("/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_final.pth"))
model = model.to(device)
model.eval()

# ✅ Confusion matrix initialization
conf_matrix = np.zeros((num_classes, num_classes), dtype=np.int64)

# ✅ Evaluation loop
print("🔍 Computing per-class IoU...")
with torch.no_grad():
    for images, targets in tqdm(val_loader):
        images = images.to(device)
        targets = targets.squeeze(1).cpu().numpy()

        outputs = model(images)
        preds = torch.argmax(outputs, dim=1).cpu().numpy()

        for t, p in zip(targets, preds):
            mask = (t != 255)
            conf_matrix += confusion_matrix(
                t[mask].flatten(),
                p[mask].flatten(),
                labels=np.arange(num_classes)
            )

# ✅ Compute per-class IoU
intersection = np.diag(conf_matrix)
union = conf_matrix.sum(1) + conf_matrix.sum(0) - intersection
iou = intersection / np.maximum(union, 1)

# ✅ Print formatted table row
print("\n📊 Table 3 — Per-class IoUs (GTA5 → Cityscapes):")
print(f"mIoU: {np.mean(iou):.4f}")
for cls_name, score in zip(class_names, iou):
    print(f"{cls_name:<15}: {score:.4f}")


🔍 Computing per-class IoU...


100%|██████████| 250/250 [00:30<00:00,  8.25it/s]


📊 Table 3 — Per-class IoUs (GTA5 → Cityscapes):
mIoU: 0.1697
road           : 0.0744
sidewalk       : 0.1029
building       : 0.6755
wall           : 0.1060
fence          : 0.0510
pole           : 0.1623
traffic light  : 0.0390
traffic sign   : 0.0391
vegetation     : 0.6584
terrain        : 0.0438
sky            : 0.6856
person         : 0.3097
rider          : 0.0229
car            : 0.1851
truck          : 0.0284
bus            : 0.0252
train          : 0.0000
motorcycle     : 0.0069
bicycle        : 0.0087



