In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import zipfile
import os

# Paths to your zip files in Drive
gta5_zip = "/content/drive/MyDrive/Semantic_Segmentation/GTA5.zip"
cityscapes_zip = "/content/drive/MyDrive/Semantic_Segmentation/Cityscapes.zip"

# Destination folders
gta5_extract_path = "/content/datasets/GTA5"
cityscapes_extract_path = "/content/datasets/Cityscapes"

# Create directories
os.makedirs(gta5_extract_path, exist_ok=True)
os.makedirs(cityscapes_extract_path, exist_ok=True)

# Extract GTA5
with zipfile.ZipFile(gta5_zip, 'r') as zip_ref:
    zip_ref.extractall(gta5_extract_path)

# Extract Cityscapes
with zipfile.ZipFile(cityscapes_zip, 'r') as zip_ref:
    zip_ref.extractall(cityscapes_extract_path)

print("✅ GTA5 dataset extracted")
print("✅ Cityscapes dataset extracted")


✅ GTA5 dataset extracted
✅ Cityscapes dataset extracted


In [3]:
import shutil
import os

# Fix Cityscapes structure
base_city = "/content/datasets/Cityscapes"
wrong_city_nested = os.path.join(base_city, "Cityscapes", "Cityspaces")

if os.path.exists(wrong_city_nested):
    for sub in ["gtFine", "images"]:
        src = os.path.join(wrong_city_nested, sub)
        dst = os.path.join(base_city, sub if sub == "gtFine" else "leftImg8bit")
        shutil.move(src, dst)
    shutil.rmtree(os.path.join(base_city, "Cityscapes"))
    print("✅ Fixed Cityscapes structure")

# Fix GTA5 structure
base_gta = "/content/datasets/GTA5"
wrong_gta_nested = os.path.join(base_gta, "GTA5")

if os.path.exists(wrong_gta_nested):
    for sub in ["images", "labels"]:
        shutil.move(os.path.join(wrong_gta_nested, sub), os.path.join(base_gta, sub))
    shutil.rmtree(wrong_gta_nested)
    print("✅ Fixed GTA5 structure")



✅ Fixed Cityscapes structure
✅ Fixed GTA5 structure


In [4]:
import os
import random
import numpy as np
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.transforms import functional as TF
from PIL import Image

# ✅ GTA5 → Cityscapes label remapping
GTA5_TO_CITYSCAPES = {
    7: 0, 8: 1, 11: 2, 12: 3, 13: 4, 15: 5, 17: 6, 19: 7, 20: 8,
    21: 9, 22: 10, 23: 11, 24: 12, 26: 13, 27: 14, 28: 15,
    31: 16, 32: 17, 33: 18
}

class GTA5Dataset(Dataset):
    def __init__(self, root, transform=None, target_transform=None, apply_color_jitter=False):
        self.image_dir = os.path.join(root, "images")
        self.label_dir = os.path.join(root, "labels")
        self.images = sorted(os.listdir(self.image_dir))
        self.labels = sorted(os.listdir(self.label_dir))
        self.transform = transform
        self.target_transform = target_transform
        self.apply_color_jitter = apply_color_jitter
        self.color_jitter = transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.05)

    def __len__(self):
        return len(self.images)

    def encode_labels(self, mask):
        remapped = np.full_like(mask, 255)
        for gta_id, city_id in GTA5_TO_CITYSCAPES.items():
            remapped[mask == gta_id] = city_id
        return remapped

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.images[idx])
        mask_path = os.path.join(self.label_dir, self.labels[idx])

        img = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path)

        if self.apply_color_jitter and random.random() < 0.5:
            img = self.color_jitter(img)

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask)
            mask = self.encode_labels(mask.squeeze().numpy())
            mask = torch.from_numpy(mask).long().unsqueeze(0)

        return img, mask

class CityscapesDataset(Dataset):
    def __init__(self, root, split='val', transform=None, target_transform=None):
        self.image_dir = os.path.join(root, "leftImg8bit", split)
        self.label_dir = os.path.join(root, "gtFine", split)
        self.transform = transform
        self.target_transform = target_transform

        self.images = []
        self.labels = []

        for city in os.listdir(self.image_dir):
            img_folder = os.path.join(self.image_dir, city)
            label_folder = os.path.join(self.label_dir, city)

            for file_name in os.listdir(img_folder):
                if file_name.endswith("_leftImg8bit.png"):
                    base = file_name.replace("_leftImg8bit.png", "")
                    img_path = os.path.join(img_folder, file_name)
                    label_path = os.path.join(label_folder, base + "_gtFine_labelTrainIds.png")

                    if os.path.exists(label_path):
                        self.images.append(img_path)
                        self.labels.append(label_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx]).convert("RGB")
        mask = Image.open(self.labels[idx])

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask)

        return img, mask


In [5]:
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image

# ✅ Image/label transforms
image_transform = transforms.Compose([
    transforms.Resize((720, 1280)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

mask_transform = transforms.Compose([
    transforms.Resize((720, 1280), interpolation=Image.NEAREST),
    transforms.PILToTensor()
])

# ✅ Instantiate GTA5 training dataset (Color Jitter ON)
gta5_dataset = GTA5Dataset(
    root='/content/datasets/GTA5',
    transform=image_transform,
    target_transform=mask_transform,
    apply_color_jitter=True  # ✅ Aug. 2: Enable Color Jitter only
)

# ✅ Instantiate Cityscapes validation dataset
val_dataset = CityscapesDataset(
    root='/content/datasets/Cityscapes',
    split='val',
    transform=image_transform,
    target_transform=mask_transform
)

# ✅ Dataloaders
train_loader = DataLoader(gta5_dataset, batch_size=2, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=2, pin_memory=True)

print(f"✅ Train loader: {len(train_loader)} batches | Val loader: {len(val_loader)} batches")


✅ Train loader: 1250 batches | Val loader: 250 batches


In [6]:
!git clone https://github.com/Gabrysse/MLDL2024_project1.git


Cloning into 'MLDL2024_project1'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 34 (delta 9), reused 3 (delta 3), pack-reused 15 (from 1)[K
Receiving objects: 100% (34/34), 11.29 KiB | 296.00 KiB/s, done.
Resolving deltas: 100% (9/9), done.


In [7]:
import sys
import torch

# Add the cloned repo to the system path
sys.path.append("/content/MLDL2024_project1")

# Import BiSeNet
from models.bisenet.build_bisenet import BiSeNet

# Initialize BiSeNet for 19 classes and move to CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BiSeNet(num_classes=19, context_path='resnet18').to(device)

print("✅ BiSeNet with ResNet18 initialized and moved to", device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 197MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:00<00:00, 225MB/s]


✅ BiSeNet with ResNet18 initialized and moved to cuda


In [8]:
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import GradScaler

# Loss: CrossEntropy with ignore_index for void class
criterion = nn.CrossEntropyLoss(ignore_index=255)

# Optimizer: SGD with momentum and weight decay
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

# AMP Scaler for mixed precision training
scaler = GradScaler()

print("✅ Loss, optimizer, and AMP scaler initialized.")


✅ Loss, optimizer, and AMP scaler initialized.


In [9]:
from torch.cuda.amp import autocast
from tqdm import tqdm
import gc

# ✅ Training config
epochs = 50
best_val_loss = float('inf')
save_path = "/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_colorjitter.pth"

print("🟢 Starting BiSeNet training with Color Jitter Augmentation...")

for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{epochs}]", leave=False)

    for images, masks in loop:
        images = images.to(device)
        masks = masks.squeeze(1).long().to(device)

        optimizer.zero_grad()

        with autocast():
            output, aux1, aux2 = model(images)
            loss1 = criterion(output, masks)
            loss2 = criterion(aux1, masks)
            loss3 = criterion(aux2, masks)
            loss = loss1 + 0.4 * (loss2 + loss3)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()
        loop.set_postfix(train_loss=loss.item())

    avg_train_loss = total_loss / len(train_loader)

    # ✅ Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for val_imgs, val_masks in val_loader:
            val_imgs = val_imgs.to(device)
            val_masks = val_masks.squeeze(1).long().to(device)

            with autocast():
                val_out = model(val_imgs)
                val_loss_batch = nn.CrossEntropyLoss(ignore_index=255)(val_out, val_masks)

            val_loss += val_loss_batch.item()

    avg_val_loss = val_loss / len(val_loader)

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), save_path)
        print(f"💾 Best model saved at epoch {epoch+1} | Val Loss: {avg_val_loss:.4f}")

    # ✅ Memory cleanup
    torch.cuda.empty_cache()
    gc.collect()
    mem_free = torch.cuda.mem_get_info()[0] / (1024 ** 3)

    print(f"✅ Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Free GPU: {mem_free:.2f} GB")

print("🏁 Augmented training complete.")


🟢 Starting BiSeNet training with Color Jitter Augmentation...




💾 Best model saved at epoch 1 | Val Loss: 3.2385
✅ Epoch 1 | Train Loss: 1.2565 | Val Loss: 3.2385 | Free GPU: 21.08 GB




✅ Epoch 2 | Train Loss: 0.8799 | Val Loss: 3.3014 | Free GPU: 21.04 GB




✅ Epoch 3 | Train Loss: 0.7332 | Val Loss: 3.7343 | Free GPU: 21.12 GB




💾 Best model saved at epoch 4 | Val Loss: 2.8423
✅ Epoch 4 | Train Loss: 0.6637 | Val Loss: 2.8423 | Free GPU: 21.07 GB




✅ Epoch 5 | Train Loss: 0.6279 | Val Loss: 4.5020 | Free GPU: 21.09 GB




✅ Epoch 6 | Train Loss: 0.5667 | Val Loss: 3.4748 | Free GPU: 20.93 GB




✅ Epoch 7 | Train Loss: 0.5370 | Val Loss: 3.9209 | Free GPU: 20.93 GB




💾 Best model saved at epoch 8 | Val Loss: 2.5342
✅ Epoch 8 | Train Loss: 0.5628 | Val Loss: 2.5342 | Free GPU: 21.10 GB




✅ Epoch 9 | Train Loss: 0.4876 | Val Loss: 3.4913 | Free GPU: 20.97 GB




✅ Epoch 10 | Train Loss: 0.4609 | Val Loss: 4.9356 | Free GPU: 21.10 GB




💾 Best model saved at epoch 11 | Val Loss: 2.3420
✅ Epoch 11 | Train Loss: 0.4419 | Val Loss: 2.3420 | Free GPU: 20.98 GB




✅ Epoch 12 | Train Loss: 0.4318 | Val Loss: 3.9413 | Free GPU: 20.89 GB




✅ Epoch 13 | Train Loss: 0.4068 | Val Loss: 4.0537 | Free GPU: 21.04 GB




✅ Epoch 14 | Train Loss: 0.4094 | Val Loss: 4.8332 | Free GPU: 20.91 GB




✅ Epoch 15 | Train Loss: 0.3760 | Val Loss: 5.8574 | Free GPU: 21.15 GB




✅ Epoch 16 | Train Loss: 0.3592 | Val Loss: 5.0078 | Free GPU: 21.07 GB




✅ Epoch 17 | Train Loss: 0.3446 | Val Loss: 4.4977 | Free GPU: 20.94 GB




✅ Epoch 18 | Train Loss: 0.3478 | Val Loss: 4.5668 | Free GPU: 21.09 GB




✅ Epoch 19 | Train Loss: 0.3242 | Val Loss: 4.5351 | Free GPU: 20.93 GB




✅ Epoch 20 | Train Loss: 0.3243 | Val Loss: 4.1521 | Free GPU: 21.00 GB




✅ Epoch 21 | Train Loss: 0.3125 | Val Loss: 3.7400 | Free GPU: 21.02 GB




✅ Epoch 22 | Train Loss: 0.3057 | Val Loss: 4.5348 | Free GPU: 20.86 GB




✅ Epoch 23 | Train Loss: 0.3134 | Val Loss: 4.2985 | Free GPU: 20.87 GB




✅ Epoch 24 | Train Loss: 0.2888 | Val Loss: 4.7729 | Free GPU: 21.04 GB




✅ Epoch 25 | Train Loss: 0.2831 | Val Loss: 4.7002 | Free GPU: 21.06 GB




✅ Epoch 26 | Train Loss: 0.2764 | Val Loss: 4.2991 | Free GPU: 20.76 GB




✅ Epoch 27 | Train Loss: 0.2750 | Val Loss: 4.4466 | Free GPU: 21.00 GB




✅ Epoch 28 | Train Loss: 0.2654 | Val Loss: 4.3825 | Free GPU: 20.96 GB




✅ Epoch 29 | Train Loss: 0.2673 | Val Loss: 4.4992 | Free GPU: 21.09 GB




✅ Epoch 30 | Train Loss: 0.2594 | Val Loss: 4.4752 | Free GPU: 20.98 GB




✅ Epoch 31 | Train Loss: 0.2533 | Val Loss: 4.7305 | Free GPU: 21.08 GB




✅ Epoch 32 | Train Loss: 0.2446 | Val Loss: 3.9350 | Free GPU: 21.02 GB




✅ Epoch 33 | Train Loss: 0.2462 | Val Loss: 5.1452 | Free GPU: 21.00 GB




✅ Epoch 34 | Train Loss: 0.4060 | Val Loss: 3.5457 | Free GPU: 20.95 GB




✅ Epoch 35 | Train Loss: 0.3130 | Val Loss: 4.4856 | Free GPU: 21.11 GB




✅ Epoch 36 | Train Loss: 0.2653 | Val Loss: 4.3501 | Free GPU: 20.98 GB




✅ Epoch 37 | Train Loss: 0.2445 | Val Loss: 4.3293 | Free GPU: 21.05 GB




✅ Epoch 38 | Train Loss: 0.2382 | Val Loss: 4.3667 | Free GPU: 21.05 GB




✅ Epoch 39 | Train Loss: 0.2305 | Val Loss: 3.8642 | Free GPU: 21.03 GB




✅ Epoch 40 | Train Loss: 0.2256 | Val Loss: 4.5995 | Free GPU: 20.98 GB




✅ Epoch 41 | Train Loss: 0.2345 | Val Loss: 4.2743 | Free GPU: 21.02 GB




✅ Epoch 42 | Train Loss: 0.2226 | Val Loss: 4.4451 | Free GPU: 21.14 GB




✅ Epoch 43 | Train Loss: 0.2224 | Val Loss: 4.1772 | Free GPU: 21.04 GB




✅ Epoch 44 | Train Loss: 0.2228 | Val Loss: 4.3787 | Free GPU: 21.13 GB




✅ Epoch 45 | Train Loss: 0.2136 | Val Loss: 4.2970 | Free GPU: 21.09 GB




✅ Epoch 46 | Train Loss: 0.2120 | Val Loss: 4.3388 | Free GPU: 21.12 GB




✅ Epoch 47 | Train Loss: 0.2229 | Val Loss: 4.3928 | Free GPU: 21.13 GB




✅ Epoch 48 | Train Loss: 0.2147 | Val Loss: 4.9023 | Free GPU: 21.02 GB




✅ Epoch 49 | Train Loss: 0.2135 | Val Loss: 4.8388 | Free GPU: 21.15 GB




✅ Epoch 50 | Train Loss: 0.2095 | Val Loss: 4.7064 | Free GPU: 21.17 GB
🏁 Augmented training complete.


Saving the model

In [10]:
# Save after training (if not already saved inside loop)
torch.save(model.state_dict(), "/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_aug2.pth")
print("✅ Model saved to Google Drive.")


✅ Model saved to Google Drive.


Evaluating mIoU

In [11]:
from models.bisenet.build_bisenet import BiSeNet
import torch
from tqdm import tqdm

# Reload model
model_aug2 = BiSeNet(num_classes=19, context_path='resnet18')
model_aug2.load_state_dict(torch.load("/content/drive/MyDrive/Semantic_Segmentation/bisenet_gta5_aug2.pth"))
model_aug2 = model_aug2.to('cuda')
model_aug2.eval()

# Validation mIoU
def evaluate_miou(model, dataloader, num_classes=19):
    from sklearn.metrics import confusion_matrix
    iou_list = []
    hist = torch.zeros(num_classes, num_classes).to('cuda')

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="🔍 Evaluating mIoU"):
            images = images.to('cuda')
            labels = labels.squeeze(1).to('cuda')

            preds = model(images)
            preds = torch.argmax(preds, dim=1)

            for p, t in zip(preds, labels):
                hist += torch.bincount(
                    num_classes * t.flatten() + p.flatten(),
                    minlength=num_classes ** 2
                ).reshape(num_classes, num_classes)

    iou = hist.diag() / (hist.sum(1) + hist.sum(0) - hist.diag() + 1e-6)
    for i, val in enumerate(iou):
        print(f"{i:02d}: {val:.4f}")
    print(f"\n📊 Final mIoU with augmentation: {iou.mean():.4f}")

evaluate_miou(model_aug2, val_loader)


🔍 Evaluating mIoU: 100%|██████████| 250/250 [00:35<00:00,  7.10it/s]

00: 0.1207
01: 0.1563
02: 0.5140
03: 0.0436
04: 0.0708
05: 0.0137
06: 0.0043
07: 0.0012
08: 0.0002
09: 0.0047
10: 0.0042
11: 0.0000
12: 0.2259
13: 0.2542
14: 0.0000
15: 0.0000
16: 0.0000
17: 0.0000
18: 0.0000

📊 Final mIoU with augmentation: 0.0744



