In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import zipfile
import os

# Define paths
gta5_zip = "/content/drive/MyDrive/Semantic_Segmentation/GTA5.zip"
cityscapes_zip = "/content/drive/MyDrive/Semantic_Segmentation/Cityscapes.zip"

os.makedirs("/content/datasets/GTA5", exist_ok=True)
os.makedirs("/content/datasets/Cityscapes", exist_ok=True)

# Unzip GTA5
with zipfile.ZipFile(gta5_zip, 'r') as zip_ref:
    zip_ref.extractall("/content/datasets/GTA5")

# Unzip Cityscapes
with zipfile.ZipFile(cityscapes_zip, 'r') as zip_ref:
    zip_ref.extractall("/content/datasets/Cityscapes")

print("✅ Both datasets extracted.")


✅ Both datasets extracted.


In [None]:
import shutil
import os

# Fix GTA5
if os.path.exists("/content/datasets/GTA5/GTA5"):
    shutil.move("/content/datasets/GTA5/GTA5/images", "/content/datasets/GTA5/images")
    shutil.move("/content/datasets/GTA5/GTA5/labels", "/content/datasets/GTA5/labels")
    shutil.rmtree("/content/datasets/GTA5/GTA5")
    print("✅ Fixed GTA5 folder structure")

# Fix Cityscapes
nested_city = "/content/datasets/Cityscapes/Cityscapes/Cityspaces"
if os.path.exists(nested_city):
    shutil.move(os.path.join(nested_city, "images"), "/content/datasets/Cityscapes/leftImg8bit")
    shutil.move(os.path.join(nested_city, "gtFine"), "/content/datasets/Cityscapes/gtFine")
    shutil.rmtree("/content/datasets/Cityscapes/Cityscapes")
    print("✅ Fixed Cityscapes folder structure")


✅ Fixed GTA5 folder structure
✅ Fixed Cityscapes folder structure


In [None]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

# ✅ Label mapping (GTA5 to Cityscapes IDs)
GTA5_TO_CITYSCAPES = {
    7: 0, 8: 1, 11: 2, 12: 3, 13: 4, 15: 5, 17: 6, 19: 7, 20: 8,
    21: 9, 22: 10, 23: 11, 24: 12, 26: 13, 27: 14, 28: 15,
    31: 16, 32: 17, 33: 18
}

class GTA5Dataset(Dataset):
    def __init__(self, root, transform=None, target_transform=None):
        self.image_dir = os.path.join(root, "images")
        self.label_dir = os.path.join(root, "labels")
        self.images = sorted(os.listdir(self.image_dir))
        self.labels = sorted(os.listdir(self.label_dir))
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.images)

    def encode_labels(self, mask):
        remapped = np.full_like(mask, 255)
        for gta_id, city_id in GTA5_TO_CITYSCAPES.items():
            remapped[mask == gta_id] = city_id
        return remapped

    def __getitem__(self, idx):
        img = Image.open(os.path.join(self.image_dir, self.images[idx])).convert("RGB")
        mask = Image.open(os.path.join(self.label_dir, self.labels[idx]))

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask)
            mask = self.encode_labels(mask.squeeze().numpy())
            mask = torch.from_numpy(mask).long().unsqueeze(0)

        return img, mask

class CityscapesDataset(Dataset):
    def __init__(self, root, split='val', transform=None, target_transform=None):
        self.image_dir = os.path.join(root, "leftImg8bit", split)
        self.label_dir = os.path.join(root, "gtFine", split)
        self.transform = transform
        self.target_transform = target_transform

        self.images = []
        self.labels = []

        for city in os.listdir(self.image_dir):
            img_folder = os.path.join(self.image_dir, city)
            label_folder = os.path.join(self.label_dir, city)

            for file_name in os.listdir(img_folder):
                if file_name.endswith("_leftImg8bit.png"):
                    base = file_name.replace("_leftImg8bit.png", "")
                    self.images.append(os.path.join(img_folder, file_name))
                    self.labels.append(os.path.join(label_folder, base + "_gtFine_labelTrainIds.png"))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx]).convert("RGB")
        mask = Image.open(self.labels[idx])

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask)

        return img, mask


In [None]:
from torchvision import transforms
from torch.utils.data import DataLoader

# Image transformation (standard normalization for pretrained ImageNet models)
image_transform = transforms.Compose([
    transforms.Resize((720, 1280)),  # Match GTA5 native resolution
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Target label transformation (no normalization)
target_transform = transforms.Compose([
    transforms.Resize((720, 1280), interpolation=Image.NEAREST),
    transforms.PILToTensor(),
])

# Dataset paths
gta_path = "/content/datasets/GTA5"
cityscapes_path = "/content/datasets/Cityscapes"

# Initialize datasets
gta_dataset = GTA5Dataset(gta_path, transform=image_transform, target_transform=target_transform)
cityscapes_val_dataset = CityscapesDataset(cityscapes_path, split='val', transform=image_transform, target_transform=target_transform)

# Dataloaders
train_loader = DataLoader(gta_dataset, batch_size=2, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(cityscapes_val_dataset, batch_size=2, shuffle=False, num_workers=2, pin_memory=True)

print(f"✅ Train loader: {len(train_loader)} batches | Val loader: {len(val_loader)} batches")


✅ Train loader: 1250 batches | Val loader: 250 batches


In [None]:
!git clone https://github.com/Gabrysse/MLDL2024_project1.git


Cloning into 'MLDL2024_project1'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (21/21), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 34 (delta 9), reused 3 (delta 3), pack-reused 13 (from 1)[K
Receiving objects: 100% (34/34), 11.29 KiB | 428.00 KiB/s, done.
Resolving deltas: 100% (9/9), done.


In [None]:
import sys
sys.path.append("/content/MLDL2024_project1")

from models.bisenet.build_bisenet import BiSeNet
print("✅ BiSeNet imported successfully.")


✅ BiSeNet imported successfully.


In [None]:
import numpy as np
import cv2
import torch
from PIL import Image

def extract_amplitude_phase(img_np):
    img_np = np.transpose(img_np, (2, 0, 1))
    fft = np.fft.fft2(img_np, axes=(-2, -1))
    amplitude = np.abs(fft)
    phase = np.angle(fft)
    return amplitude, phase

def FDA_source_to_target(src_img, tgt_img, L=0.01):
    src_np = np.asarray(src_img, dtype=np.float32)
    tgt_np = np.asarray(tgt_img, dtype=np.float32)

    # Normalize
    src_np /= 255.0
    tgt_np /= 255.0

    src_amp, src_phase = extract_amplitude_phase(src_np)
    tgt_amp, _ = extract_amplitude_phase(tgt_np)

    # Swap low-frequency amplitudes
    _, h, w = src_amp.shape
    b = int(np.floor(min(h, w) * L))
    c_h = int(np.floor(h / 2.0))
    c_w = int(np.floor(w / 2.0))

    h1 = c_h - b
    h2 = c_h + b + 1
    w1 = c_w - b
    w2 = c_w + b + 1

    src_amp[:, h1:h2, w1:w2] = tgt_amp[:, h1:h2, w1:w2]

    # Reconstruct back to image
    fft_src = src_amp * np.exp(1j * src_phase)
    src_inversed = np.fft.ifft2(fft_src, axes=(-2, -1))
    src_inversed = np.real(src_inversed)

    src_inversed = np.transpose(src_inversed, (1, 2, 0))
    src_inversed = np.clip(src_inversed * 255.0, 0, 255).astype(np.uint8)

    return Image.fromarray(src_inversed)



In [None]:
import os
import random
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import numpy as np
import torch

# Label remapping
GTA5_TO_CITYSCAPES = {
    7: 0, 8: 1, 11: 2, 12: 3, 13: 4, 15: 5, 17: 6, 19: 7, 20: 8,
    21: 9, 22: 10, 23: 11, 24: 12, 26: 13, 27: 14, 28: 15,
    31: 16, 32: 17, 33: 18
}

class GTA5FDA(Dataset):
    def __init__(self, gta5_root, cityscapes_root, transform=None, target_transform=None):
        self.image_dir = os.path.join(gta5_root, "images")
        self.label_dir = os.path.join(gta5_root, "labels")
        self.cs_image_dir = os.path.join(cityscapes_root, "leftImg8bit", "train")

        self.images = sorted(os.listdir(self.image_dir))
        self.labels = sorted(os.listdir(self.label_dir))
        self.transform = transform
        self.target_transform = target_transform

        self.cityscapes_images = []
        for city in os.listdir(self.cs_image_dir):
            img_folder = os.path.join(self.cs_image_dir, city)
            for file_name in os.listdir(img_folder):
                if file_name.endswith("_leftImg8bit.png"):
                    self.cityscapes_images.append(os.path.join(img_folder, file_name))

    def __len__(self):
        return len(self.images)

    def encode_labels(self, mask):
        remapped = np.full_like(mask, 255)
        for gta_id, city_id in GTA5_TO_CITYSCAPES.items():
            remapped[mask == gta_id] = city_id
        return remapped

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        label_path = os.path.join(self.label_dir, self.labels[idx])

        img = Image.open(img_path).convert("RGB")
        label = Image.open(label_path)

        # Apply FDA with a random Cityscapes image
        tgt_img_path = random.choice(self.cityscapes_images)
        tgt_img = Image.open(tgt_img_path).convert("RGB")
        img = FDA_source_to_target(img, tgt_img, L=0.01)

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            label = self.target_transform(label)
            label = self.encode_labels(label.squeeze().numpy())
            label = torch.from_numpy(label).long().unsqueeze(0)

        return img, label


In [None]:
from torch.utils.data import DataLoader

# Image and label transforms
image_transform = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

label_transform = transforms.Compose([
    transforms.Resize((512, 1024), interpolation=Image.NEAREST),
    transforms.PILToTensor()
])

#  Initialize FDA-augmented GTA5 training dataset
fda_dataset = GTA5FDA(
    gta5_root='/content/datasets/GTA5',
    cityscapes_root='/content/datasets/Cityscapes',
    transform=image_transform,
    target_transform=label_transform
)

#  Validation dataset (Cityscapes)
val_dataset = CityscapesDataset(
    root='/content/datasets/Cityscapes',
    split='val',
    transform=image_transform,
    target_transform=label_transform
)

#  Dataloaders
train_loader = DataLoader(fda_dataset, batch_size=2, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=2)

print("✅ Train loader:", len(train_loader), "batches | Val loader:", len(val_loader), "batches")


✅ Train loader: 1250 batches | Val loader: 250 batches


In [None]:
from models.bisenet.build_bisenet import BiSeNet
import torch

#  Dynamic device assignment
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#  Initialize BiSeNet with ResNet-18 backbone
model = BiSeNet(num_classes=19, context_path='resnet18')
model = model.to(device)

print(f"✅ BiSeNet model initialized and moved to {device}")

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 140MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 105MB/s] 


✅ BiSeNet model initialized and moved to cpu


In [None]:
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import GradScaler

# Loss function (ignore index 255 for unlabeled/void class)
criterion = nn.CrossEntropyLoss(ignore_index=255)

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

# AMP scaler
scaler = GradScaler()

print(" Loss, optimizer, and AMP scaler initialized.")


 Loss, optimizer, and AMP scaler initialized.


In [None]:
import os
import torch
from torch import nn, optim
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from models.bisenet.build_bisenet import BiSeNet

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Paths
checkpoint_dir = "/content/drive/MyDrive/Semantic_Segmentation/checkpoints_fda"
os.makedirs(checkpoint_dir, exist_ok=True)
final_model_path = "/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_fda_final.pth"

# Initialize model
model = BiSeNet(num_classes=19, context_path='resnet18').to(device)
criterion = nn.CrossEntropyLoss(ignore_index=255)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scaler = GradScaler()

best_val_loss = float('inf')
start_epoch = 1
num_epochs = 50

# Resume from checkpoint if exists
import re

# Extract numerical epoch index safely and sort accordingly
def extract_epoch(file_name):
    match = re.search(r'epoch_(\d+)\.pth', file_name)
    return int(match.group(1)) if match else -1

latest_ckpts = sorted(
    [f for f in os.listdir(checkpoint_dir) if f.endswith('.pth')],
    key=extract_epoch
)

if latest_ckpts:
    latest = os.path.join(checkpoint_dir, latest_ckpts[-1])
    checkpoint = torch.load(latest, map_location=device)
    model.load_state_dict(checkpoint['model_state'])
    optimizer.load_state_dict(checkpoint['optimizer_state'])
    # Check if scaler_state is not empty before loading, and only load once
    if checkpoint.get('scaler_state') and checkpoint['scaler_state']:
        scaler.load_state_dict(checkpoint['scaler_state'])

    best_val_loss = checkpoint['best_val_loss']
    start_epoch = checkpoint['epoch'] + 1
    print(f"🔁 Resumed from {latest} | Starting at epoch {start_epoch}")

# Training loop
for epoch in range(start_epoch, num_epochs + 1):
    model.train()
    total_loss = 0

    for images, masks in tqdm(train_loader, desc=f"🟢 Epoch {epoch} Training"):
        images = images.to(device)
        masks = masks.squeeze(1).to(device).long()

        optimizer.zero_grad()
        with autocast():
            out, aux2, aux3 = model(images)
            loss = (criterion(out, masks) +
                    0.4 * criterion(aux2, masks) +
                    0.4 * criterion(aux3, masks))

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)

    # ✅ Validation Phase (inside epoch loop)
    model.eval()
    total_val_loss = 0

    with torch.no_grad():
        for val_imgs, val_masks in val_loader:
            val_imgs = val_imgs.to(device)
            val_masks = val_masks.squeeze(1).to(device).long()

            with autocast():
                val_out = model(val_imgs)

            if val_out.shape[2:] != val_masks.shape[1:]:
                val_out = torch.nn.functional.interpolate(val_out, size=val_masks.shape[1:], mode='bilinear', align_corners=False)

            val_loss = criterion(val_out, val_masks)
            total_val_loss += val_loss.item()

    avg_val_loss = total_val_loss / len(val_loader)

    #  Save best model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), final_model_path)
        print(f"💾 Best model saved at epoch {epoch} | Val Loss: {avg_val_loss:.4f}")

    #  Save checkpoint every epoch
    ckpt_path = os.path.join(checkpoint_dir, f"bisenet_gta5_fda_epoch_{epoch}.pth")
    checkpoint = {
        'epoch': epoch,
        'model_state': model.state_dict(),
        'optimizer_state': optimizer.state_dict(),
        'scaler_state': scaler.state_dict(), # Always save scaler state
        'best_val_loss': best_val_loss
    }
    torch.save(checkpoint, ckpt_path)
    print(f"✅ Checkpoint saved: {ckpt_path}")
    print(f"📊 Epoch {epoch} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

print("FDA-based training complete.")

🔁 Resumed from /content/drive/MyDrive/Semantic_Segmentation/checkpoints_fda/bisenet_gta5_fda_epoch_49.pth | Starting at epoch 50


🟢 Epoch 50 Training: 100%|██████████| 1250/1250 [5:19:28<00:00, 15.34s/it]


✅ Checkpoint saved: /content/drive/MyDrive/Semantic_Segmentation/checkpoints_fda/bisenet_gta5_fda_epoch_50.pth
📊 Epoch 50 | Train Loss: 0.2605 | Val Loss: 5.3436
FDA-based training complete.


In [None]:
import torch
from tqdm import tqdm
from models.bisenet.build_bisenet import BiSeNet

# Configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_classes = 19
checkpoint_path = "/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_fda_final.pth"

# Load model
model = BiSeNet(num_classes=num_classes, context_path='resnet18')
model.load_state_dict(torch.load(checkpoint_path, map_location=device))
model = model.to(device)
model.eval()

# Evaluation Function (per-class IoU + mIoU)
def evaluate_miou(model, dataloader, num_classes=19):
    hist = torch.zeros(num_classes, num_classes).to(device)

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="🔍 Evaluating mIoU"):
            images = images.to(device)
            labels = labels.squeeze(1).to(device)  # [B, H, W]

            preds = model(images)
            if isinstance(preds, tuple):  # Handle multi-output model
                preds = preds[0]
            preds = torch.argmax(preds, dim=1)  # [B, H, W]

            for p, t in zip(preds, labels):
                # Flatten predictions and labels
                p = p.view(-1)
                t = t.view(-1)
                mask = (t >= 0) & (t < num_classes)
                p = p[mask]
                t = t[mask]

                # Confusion matrix accumulation
                hist += torch.bincount(num_classes * t + p, minlength=num_classes**2).reshape(num_classes, num_classes)

    # Compute per-class IoU and mIoU
    iou = hist.diag() / (hist.sum(1) + hist.sum(0) - hist.diag() + 1e-6)
    for i, val in enumerate(iou):
        print(f"Class {i:02d}: IoU = {val:.4f}")
    print(f"\n📊 Final mIoU: {iou.mean():.4f}")

# Run evaluation
evaluate_miou(model, val_loader, num_classes=num_classes)


🔍 Evaluating mIoU: 100%|██████████| 250/250 [14:34<00:00,  3.50s/it]

Class 00: IoU = 0.4281
Class 01: IoU = 0.1856
Class 02: IoU = 0.5618
Class 03: IoU = 0.0354
Class 04: IoU = 0.0345
Class 05: IoU = 0.0162
Class 06: IoU = 0.0073
Class 07: IoU = 0.0000
Class 08: IoU = 0.0000
Class 09: IoU = 0.0019
Class 10: IoU = 0.0005
Class 11: IoU = 0.0012
Class 12: IoU = 0.0031
Class 13: IoU = 0.3649
Class 14: IoU = 0.0000
Class 15: IoU = 0.0000
Class 16: IoU = 0.0000
Class 17: IoU = 0.0000
Class 18: IoU = 0.0000

📊 Final mIoU: 0.0863



