In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import zipfile
import os

# Paths to your ZIP files in Drive
gta5_zip = "/content/drive/MyDrive/Semantic_Segmentation/GTA5.zip"
cityscapes_zip = "/content/drive/MyDrive/Semantic_Segmentation/Cityscapes.zip"

# Extraction destination
extract_path = "/content/datasets"

# Unzip
with zipfile.ZipFile(gta5_zip, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

with zipfile.ZipFile(cityscapes_zip, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Datasets extracted")


✅ Datasets extracted


In [None]:
import shutil
import os

base_path = "/content/datasets/Cityscapes/Cityspaces"

if os.path.exists(base_path):
    shutil.move(os.path.join(base_path, "gtFine"), "/content/datasets/Cityscapes/gtFine")
    shutil.move(os.path.join(base_path, "images"), "/content/datasets/Cityscapes/leftImg8bit")
    shutil.rmtree(base_path)
    print("✅ Fixed Cityscapes folder structure")
else:
    print("⚠️ Folder structure already appears correct.")


✅ Fixed Cityscapes folder structure


Defining the datasets with combined augmentations

In [None]:
import os
import random
import numpy as np
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.transforms import functional as TF
from PIL import Image

# ✅ GTA5 → Cityscapes label remapping
GTA5_TO_CITYSCAPES = {
    7: 0, 8: 1, 11: 2, 12: 3, 13: 4, 15: 5, 17: 6, 19: 7, 20: 8,
    21: 9, 22: 10, 23: 11, 24: 12, 26: 13, 27: 14, 28: 15,
    31: 16, 32: 17, 33: 18
}

class GTA5Dataset(Dataset):
    def __init__(self, root, transform=None, target_transform=None, apply_aug=False):
        self.image_dir = os.path.join(root, "images")
        self.label_dir = os.path.join(root, "labels")
        self.images = sorted(os.listdir(self.image_dir))
        self.labels = sorted(os.listdir(self.label_dir))
        self.transform = transform
        self.target_transform = target_transform
        self.apply_aug = apply_aug

        self.color_jitter = transforms.ColorJitter(
            brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05
        )

    def __len__(self):
        return len(self.images)

    def encode_labels(self, mask):
        remapped = np.full_like(mask, 255)
        for gta_id, city_id in GTA5_TO_CITYSCAPES.items():
            remapped[mask == gta_id] = city_id
        return remapped

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.label_dir, self.labels[idx])

        img = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path)

        if self.apply_aug:
            if random.random() < 0.5:
                img = TF.hflip(img)
                mask = TF.hflip(mask)
            if random.random() < 0.5:
                img = self.color_jitter(img)

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask)
            mask = self.encode_labels(mask.squeeze().numpy())
            mask = torch.from_numpy(mask).long().unsqueeze(0)

        return img, mask

class CityscapesDataset(Dataset):
    def __init__(self, root, split='val', transform=None, target_transform=None):
        self.image_dir = os.path.join(root, "leftImg8bit", split)
        self.label_dir = os.path.join(root, "gtFine", split)
        self.transform = transform
        self.target_transform = target_transform

        self.images = []
        self.labels = []

        for city in os.listdir(self.image_dir):
            img_folder = os.path.join(self.image_dir, city)
            label_folder = os.path.join(self.label_dir, city)

            for file_name in os.listdir(img_folder):
                if file_name.endswith("_leftImg8bit.png"):
                    base = file_name.replace("_leftImg8bit.png", "")
                    img_path = os.path.join(img_folder, file_name)
                    label_path = os.path.join(label_folder, base + "_gtFine_labelTrainIds.png")

                    if os.path.exists(label_path):
                        self.images.append(img_path)
                        self.labels.append(label_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx]).convert("RGB")
        mask = Image.open(self.labels[idx])

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask)

        return img, mask


In [None]:
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image

# ✅ Image and mask transforms
image_transform = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

mask_transform = transforms.Compose([
    transforms.Resize((512, 1024), interpolation=Image.NEAREST),
    transforms.PILToTensor()
])

# ✅ Initialize datasets with transforms and augmentation
gta5_dataset = GTA5Dataset(
    root="/content/datasets/GTA5",
    transform=image_transform,
    target_transform=mask_transform,
    apply_aug=True  # ✅ Combined augmentation
)

cityscapes_val = CityscapesDataset(
    root="/content/datasets/Cityscapes",
    split='val',
    transform=image_transform,
    target_transform=mask_transform
)

# ✅ Create DataLoaders
train_loader = DataLoader(gta5_dataset, batch_size=2, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(cityscapes_val, batch_size=2, shuffle=False, num_workers=2, pin_memory=True)

print(f"✅ Train loader: {len(train_loader)} batches | Val loader: {len(val_loader)} batches")


✅ Train loader: 1250 batches | Val loader: 250 batches


In [None]:
# ✅ Clone the official MLDL2024 project repo (if needed)
!git clone https://github.com/Gabrysse/MLDL2024_project1.git

# ✅ Add it to the system path
import sys
sys.path.append("/content/MLDL2024_project1")

# ✅ Import BiSeNet
from models.bisenet.build_bisenet import BiSeNet
print("✅ BiSeNet import successful.")


Cloning into 'MLDL2024_project1'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 34 (delta 9), reused 3 (delta 3), pack-reused 15 (from 1)[K
Receiving objects: 100% (34/34), 11.29 KiB | 11.29 MiB/s, done.
Resolving deltas: 100% (9/9), done.
✅ BiSeNet import successful.


In [None]:
import torch

# ✅ Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ Create the BiSeNet model with ResNet18
model = BiSeNet(num_classes=19, context_path='resnet18')
model = model.to(device)
print("✅ BiSeNet with ResNet18 initialized and moved to", device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 215MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:00<00:00, 207MB/s]


✅ BiSeNet with ResNet18 initialized and moved to cuda


In [None]:
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import GradScaler

# ✅ Loss function (ignore index 255, which means "void")
criterion = nn.CrossEntropyLoss(ignore_index=255)

# ✅ Optimizer (SGD with momentum and weight decay)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

# ✅ AMP scaler (for mixed precision training)
scaler = GradScaler()

print("✅ Loss, optimizer, and AMP scaler initialized.")


✅ Loss, optimizer, and AMP scaler initialized.


In [None]:
from torch.cuda.amp import autocast
from tqdm import tqdm
import gc

# ✅ Training configuration
epochs = 50
best_val_loss = float("inf")
save_path = "/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_aug_combined.pth"

print("🟢 Starting BiSeNet training with Combined Augmentations...")

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    loop = tqdm(train_loader, total=len(train_loader), desc=f"Epoch [{epoch+1}/{epochs}]", leave=False)

    for images, masks in loop:
        images = images.to(device)
        masks = masks.squeeze(1).long().to(device)

        optimizer.zero_grad()

        with autocast():
            out, aux1, aux2 = model(images)
            loss1 = criterion(out, masks)
            loss2 = criterion(aux1, masks)
            loss3 = criterion(aux2, masks)
            loss = loss1 + 0.4 * (loss2 + loss3)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    avg_train_loss = running_loss / len(train_loader)

    # ✅ Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for val_imgs, val_masks in val_loader:
            val_imgs = val_imgs.to(device)
            val_masks = val_masks.squeeze(1).long().to(device)

            with autocast():
                val_out = model(val_imgs)
                val_loss_batch = criterion(val_out, val_masks)

            val_loss += val_loss_batch.item()

    avg_val_loss = val_loss / len(val_loader)

    # ✅ Save best model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), save_path)
        print(f"💾 Best model saved at epoch {epoch+1} | Val Loss: {avg_val_loss:.4f}")

    # ✅ GPU cleanup
    torch.cuda.empty_cache()
    gc.collect()
    mem_free = torch.cuda.mem_get_info()[0] / (1024 ** 3)

    print(f"✅ Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Free GPU: {mem_free:.2f} GB")

print("🏁 Combined augmentation training complete.")


🟢 Starting BiSeNet training with Combined Augmentations...




💾 Best model saved at epoch 1 | Val Loss: 2.9837
✅ Epoch 1 | Train Loss: 1.4100 | Val Loss: 2.9837 | Free GPU: 38.47 GB




💾 Best model saved at epoch 2 | Val Loss: 2.8402
✅ Epoch 2 | Train Loss: 0.9861 | Val Loss: 2.8402 | Free GPU: 38.45 GB




✅ Epoch 3 | Train Loss: 0.8441 | Val Loss: 4.0824 | Free GPU: 38.39 GB




✅ Epoch 4 | Train Loss: 0.8121 | Val Loss: 3.7960 | Free GPU: 38.43 GB




✅ Epoch 5 | Train Loss: 0.9374 | Val Loss: 4.2916 | Free GPU: 38.43 GB




✅ Epoch 6 | Train Loss: 0.7537 | Val Loss: 3.5445 | Free GPU: 38.52 GB




✅ Epoch 7 | Train Loss: 0.6942 | Val Loss: 3.5713 | Free GPU: 38.43 GB




💾 Best model saved at epoch 8 | Val Loss: 2.6615
✅ Epoch 8 | Train Loss: 0.6840 | Val Loss: 2.6615 | Free GPU: 38.40 GB




✅ Epoch 9 | Train Loss: 0.6425 | Val Loss: 5.0285 | Free GPU: 38.56 GB




✅ Epoch 10 | Train Loss: 0.6011 | Val Loss: 4.1269 | Free GPU: 38.49 GB




✅ Epoch 11 | Train Loss: 0.5850 | Val Loss: 3.7501 | Free GPU: 38.48 GB




✅ Epoch 12 | Train Loss: 0.5462 | Val Loss: 4.0968 | Free GPU: 38.54 GB




✅ Epoch 13 | Train Loss: 0.6119 | Val Loss: 4.2419 | Free GPU: 38.51 GB




✅ Epoch 14 | Train Loss: 0.5261 | Val Loss: 3.7358 | Free GPU: 38.51 GB




✅ Epoch 15 | Train Loss: 0.5050 | Val Loss: 3.4012 | Free GPU: 38.39 GB




✅ Epoch 16 | Train Loss: 0.4922 | Val Loss: 4.7972 | Free GPU: 38.53 GB




✅ Epoch 17 | Train Loss: 0.4748 | Val Loss: 4.9053 | Free GPU: 38.52 GB




✅ Epoch 18 | Train Loss: 0.4756 | Val Loss: 3.8417 | Free GPU: 38.39 GB




✅ Epoch 19 | Train Loss: 0.4536 | Val Loss: 4.0490 | Free GPU: 38.48 GB




✅ Epoch 20 | Train Loss: 0.4371 | Val Loss: 5.9471 | Free GPU: 38.51 GB




✅ Epoch 21 | Train Loss: 0.4307 | Val Loss: 7.2828 | Free GPU: 38.41 GB




✅ Epoch 22 | Train Loss: 0.4153 | Val Loss: 4.2896 | Free GPU: 38.44 GB




✅ Epoch 23 | Train Loss: 0.3959 | Val Loss: 6.4983 | Free GPU: 38.45 GB




✅ Epoch 24 | Train Loss: 0.3920 | Val Loss: 4.3770 | Free GPU: 38.39 GB




✅ Epoch 25 | Train Loss: 0.3912 | Val Loss: 5.0916 | Free GPU: 38.46 GB




✅ Epoch 26 | Train Loss: 0.3774 | Val Loss: 5.0571 | Free GPU: 38.46 GB




✅ Epoch 27 | Train Loss: 0.3710 | Val Loss: 5.7845 | Free GPU: 38.41 GB




✅ Epoch 28 | Train Loss: 0.3750 | Val Loss: 3.3858 | Free GPU: 38.50 GB




✅ Epoch 29 | Train Loss: 0.3696 | Val Loss: 4.0508 | Free GPU: 38.46 GB




✅ Epoch 30 | Train Loss: 0.3509 | Val Loss: 3.7895 | Free GPU: 38.50 GB




✅ Epoch 31 | Train Loss: 0.3453 | Val Loss: 5.0836 | Free GPU: 38.44 GB




✅ Epoch 32 | Train Loss: 0.3359 | Val Loss: 6.0133 | Free GPU: 38.44 GB




✅ Epoch 33 | Train Loss: 0.3335 | Val Loss: 4.2513 | Free GPU: 38.54 GB




✅ Epoch 34 | Train Loss: 0.3374 | Val Loss: 4.2643 | Free GPU: 38.49 GB




✅ Epoch 35 | Train Loss: 0.3304 | Val Loss: 4.4273 | Free GPU: 38.47 GB




✅ Epoch 36 | Train Loss: 0.3312 | Val Loss: 6.3950 | Free GPU: 38.47 GB




✅ Epoch 37 | Train Loss: 0.3137 | Val Loss: 5.9583 | Free GPU: 38.46 GB




✅ Epoch 38 | Train Loss: 0.3106 | Val Loss: 5.1779 | Free GPU: 38.58 GB




✅ Epoch 39 | Train Loss: 0.3018 | Val Loss: 6.0585 | Free GPU: 38.51 GB




✅ Epoch 40 | Train Loss: 0.3194 | Val Loss: 4.5392 | Free GPU: 38.40 GB




✅ Epoch 41 | Train Loss: 0.2971 | Val Loss: 4.0968 | Free GPU: 38.36 GB




✅ Epoch 42 | Train Loss: 0.2899 | Val Loss: 6.4808 | Free GPU: 38.55 GB




✅ Epoch 43 | Train Loss: 0.2911 | Val Loss: 4.6964 | Free GPU: 38.54 GB




✅ Epoch 44 | Train Loss: 0.2841 | Val Loss: 5.2253 | Free GPU: 38.43 GB




✅ Epoch 45 | Train Loss: 0.2790 | Val Loss: 4.5851 | Free GPU: 38.55 GB




✅ Epoch 46 | Train Loss: 0.2891 | Val Loss: 4.8917 | Free GPU: 38.41 GB




✅ Epoch 47 | Train Loss: 0.3041 | Val Loss: 3.9848 | Free GPU: 38.52 GB




✅ Epoch 48 | Train Loss: 0.2847 | Val Loss: 5.1047 | Free GPU: 38.51 GB




✅ Epoch 49 | Train Loss: 0.2829 | Val Loss: 4.5508 | Free GPU: 38.49 GB




✅ Epoch 50 | Train Loss: 0.2904 | Val Loss: 6.3403 | Free GPU: 38.52 GB
🏁 Combined augmentation training complete.


In [None]:
from tqdm import tqdm
import torch.nn.functional as F

# Load best model
model.load_state_dict(torch.load("/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_aug_combined.pth"))
model.eval()



BiSeNet(
  (saptial_path): Spatial_path(
    (convblock1): ConvBlock(
      (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (convblock2): ConvBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (convblock3): ConvBlock(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
  )
  (context_path): resnet18(
    (features): ResNet(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_sta

In [None]:
# Save after training (if not already saved inside loop)
torch.save(model.state_dict(), "/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_aug_combined.pth")
print("✅ Model saved to Google Drive.")

✅ Model saved to Google Drive.


In [None]:
from models.bisenet.build_bisenet import BiSeNet
import torch
from tqdm import tqdm

# Reload model
model_aug2 = BiSeNet(num_classes=19, context_path='resnet18')
model_aug2.load_state_dict(torch.load("/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_aug_combined.pth"))
model_aug2 = model_aug2.to('cuda')
model_aug2.eval()

# Validation mIoU
def evaluate_miou(model, dataloader, num_classes=19):
    from sklearn.metrics import confusion_matrix
    iou_list = []
    hist = torch.zeros(num_classes, num_classes).to('cuda')

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="🔍 Evaluating mIoU"):
            images = images.to('cuda')
            labels = labels.squeeze(1).to('cuda')

            preds = model(images)
            preds = torch.argmax(preds, dim=1)

            for p, t in zip(preds, labels):
                hist += torch.bincount(
                    num_classes * t.flatten() + p.flatten(),
                    minlength=num_classes ** 2
                ).reshape(num_classes, num_classes)

    iou = hist.diag() / (hist.sum(1) + hist.sum(0) - hist.diag() + 1e-6)
    for i, val in enumerate(iou):
        print(f"{i:02d}: {val:.4f}")
    print(f"\n📊 Final mIoU with augmentation: {iou.mean():.4f}")

evaluate_miou(model_aug2, val_loader)


🔍 Evaluating mIoU: 100%|██████████| 250/250 [00:30<00:00,  8.29it/s]

00: 0.6954
01: 0.0655
02: 0.6544
03: 0.0348
04: 0.0336
05: 0.0129
06: 0.0059
07: 0.0000
08: 0.0000
09: 0.0026
10: 0.0037
11: 0.0003
12: 0.0093
13: 0.3477
14: 0.0000
15: 0.0000
16: 0.0000
17: 0.0000
18: 0.0000

📊 Final mIoU with augmentation: 0.0982



