#SEMANTIC SEGMENTATION 2b

Mounting Google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Setting up the dataset

In [2]:
import zipfile
import os

# Path to zip in Drive
zip_path = '/content/drive/MyDrive/Semantic_Segmentation/Cityscapes.zip'
extract_path = '/content/datasets/Cityscapes'

# Create extract directory
os.makedirs(extract_path, exist_ok=True)

# Unzip
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Cityscapes dataset extracted to:", extract_path)

✅ Cityscapes dataset extracted to: /content/datasets/Cityscapes


In [3]:
import os
import shutil

# Paths
broken_root = '/content/datasets/Cityscapes/Cityscapes/Cityspaces'
target_root = '/content/datasets/Cityscapes'

# Move gtFine and images up
for folder in ['gtFine', 'images']:
    src = os.path.join(broken_root, folder)
    dst = os.path.join(target_root, folder)
    if os.path.exists(src):
        shutil.move(src, dst)

# Rename 'images' to 'leftImg8bit' to match standard naming
os.rename(os.path.join(target_root, 'images'),
          os.path.join(target_root, 'leftImg8bit'))

print("✅ Folder structure corrected.")


✅ Folder structure corrected.


Cloning the github repository

In [4]:
!git clone https://github.com/Gabrysse/MLDL2024_project1.git


Cloning into 'MLDL2024_project1'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 34 (delta 9), reused 3 (delta 3), pack-reused 15 (from 1)[K
Receiving objects: 100% (34/34), 11.29 KiB | 11.29 MiB/s, done.
Resolving deltas: 100% (9/9), done.


In [5]:
!ls MLDL2024_project1/models

bisenet  deeplabv2


In [6]:
!ls MLDL2024_project1/models/bisenet

build_bisenet.py  build_contextpath.py


In [7]:
import sys
import torch

# Add project path to system
sys.path.append("/content/MLDL2024_project1")

# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("🟢 Using device:", device)


🟢 Using device: cuda


In [8]:
from models.bisenet.build_bisenet import BiSeNet

# Initialize BiSeNet with 19 semantic classes and 'resnet18' backbone
model = BiSeNet(num_classes=19, context_path='resnet18')

# Move model to CUDA
model = model.to(device)
print("✅ BiSeNet model loaded and moved to", device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 173MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 142MB/s]


✅ BiSeNet model loaded and moved to cuda


Datasetting and Dataloading

In [9]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# ✅ Dataset root path
root_dir = "/content/datasets/Cityscapes"
images_base = os.path.join(root_dir, "leftImg8bit")
masks_base = os.path.join(root_dir, "gtFine")

# ✅ Cityscapes Custom Dataset
class CityscapesDataset(Dataset):
    def __init__(self, split='train', transform=None, target_transform=None):
        self.split = split
        self.transform = transform
        self.target_transform = target_transform
        self.images = []
        self.masks = []

        cities_path = os.path.join(images_base, split)
        for city in os.listdir(cities_path):
            img_dir = os.path.join(cities_path, city)
            mask_dir = os.path.join(masks_base, split, city)

            for file_name in os.listdir(img_dir):
                if file_name.endswith("_leftImg8bit.png"):
                    base = file_name.replace("_leftImg8bit.png", "")
                    img_path = os.path.join(img_dir, file_name)
                    mask_path = os.path.join(mask_dir, base + "_gtFine_labelTrainIds.png")
                    if os.path.exists(mask_path):
                        self.images.append(img_path)
                        self.masks.append(mask_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx]).convert('RGB')
        mask = Image.open(self.masks[idx])

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask)

        return img, mask

# ✅ Transformations
image_transform = transforms.Compose([
    transforms.Resize((512, 1024)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

mask_transform = transforms.Compose([
    transforms.Resize((512, 1024), interpolation=Image.NEAREST),
    transforms.PILToTensor()
])

# ✅ Datasets & Loaders
train_dataset = CityscapesDataset(split='train', transform=image_transform, target_transform=mask_transform)
val_dataset = CityscapesDataset(split='val', transform=image_transform, target_transform=mask_transform)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=2, pin_memory=True)

print(f"✅ Dataset loaded: {len(train_dataset)} training samples, {len(val_dataset)} validation samples")

✅ Dataset loaded: 1572 training samples, 500 validation samples


In [10]:
import torch.nn as nn
import torch.optim as optim
from torch.amp import autocast, GradScaler

# ✅ Combined loss for 3 outputs (main, aux1, aux2)
class BiSeNetLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.criterion = nn.CrossEntropyLoss(ignore_index=255)

    def forward(self, preds, target):
        main, aux1, aux2 = preds
        loss1 = self.criterion(main, target)
        loss2 = self.criterion(aux1, target)
        loss3 = self.criterion(aux2, target)
        return loss1 + 0.4 * (loss2 + loss3)  # standard BiSeNet weighting

# ✅ Initialize loss
criterion = BiSeNetLoss()

# ✅ Optimizer (SGD with momentum, same as 2a)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

# ✅ Mixed precision scaler
scaler = GradScaler(device='cuda')

print("✅ BiSeNet loss, optimizer, and mixed precision scaler initialized.")

✅ BiSeNet loss, optimizer, and mixed precision scaler initialized.


Training BiseNet

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
import gc
import os

# ✅ Training config
epochs = 50
save_path = "/content/drive/MyDrive/Semantic_Segmentation/bisenet_best_model.pth"

# ✅ Best validation tracking
best_val_loss = float("inf")

print("🟢 Starting BiSeNet training...")

for epoch in range(epochs):
    model.train()
    train_loss = 0.0

    loop = tqdm(train_loader, total=len(train_loader), desc=f"Epoch [{epoch+1}/{epochs}]", leave=False)

    for images, targets in loop:
        images = images.to(device)
        targets = targets.squeeze(1).long().to(device)

        optimizer.zero_grad()

        with autocast():
            preds = model(images)  # returns (main, aux1, aux2)
            loss = criterion(preds, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    avg_train_loss = train_loss / len(train_loader)

    # ✅ Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for val_imgs, val_masks in val_loader:
            val_imgs = val_imgs.to(device)
            val_masks = val_masks.squeeze(1).long().to(device)

            with autocast():
                 val_output = model(val_imgs)  # only main output
                 loss = nn.CrossEntropyLoss(ignore_index=255)(val_output, val_masks)

            val_loss += loss.item()


    avg_val_loss = val_loss / len(val_loader)

    # ✅ Save best model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), save_path)
        print(f"💾 Best model saved at epoch {epoch+1} | Val Loss: {avg_val_loss:.4f}")

    # ✅ Memory cleanup
    torch.cuda.empty_cache()
    gc.collect()
    mem_free = torch.cuda.mem_get_info()[0] / (1024 ** 3)

    print(f"✅ Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Free GPU: {mem_free:.2f} GB")

print("🏁 BiSeNet training complete.")


🟢 Starting BiSeNet training...




💾 Best model saved at epoch 1 | Val Loss: 0.6477
✅ Epoch 1 | Train Loss: 1.4489 | Val Loss: 0.6477 | Free GPU: 21.35 GB




💾 Best model saved at epoch 2 | Val Loss: 0.4815
✅ Epoch 2 | Train Loss: 1.0314 | Val Loss: 0.4815 | Free GPU: 21.39 GB




💾 Best model saved at epoch 3 | Val Loss: 0.4458
✅ Epoch 3 | Train Loss: 0.8314 | Val Loss: 0.4458 | Free GPU: 21.35 GB




💾 Best model saved at epoch 4 | Val Loss: 0.3915
✅ Epoch 4 | Train Loss: 0.7300 | Val Loss: 0.3915 | Free GPU: 21.41 GB




💾 Best model saved at epoch 5 | Val Loss: 0.3614
✅ Epoch 5 | Train Loss: 0.6658 | Val Loss: 0.3614 | Free GPU: 21.33 GB




✅ Epoch 6 | Train Loss: 0.6206 | Val Loss: 0.4091 | Free GPU: 21.31 GB




💾 Best model saved at epoch 7 | Val Loss: 0.3555
✅ Epoch 7 | Train Loss: 0.6041 | Val Loss: 0.3555 | Free GPU: 21.33 GB




💾 Best model saved at epoch 8 | Val Loss: 0.3476
✅ Epoch 8 | Train Loss: 0.5468 | Val Loss: 0.3476 | Free GPU: 21.28 GB




✅ Epoch 9 | Train Loss: 0.5190 | Val Loss: 0.3619 | Free GPU: 21.32 GB




💾 Best model saved at epoch 10 | Val Loss: 0.3081
✅ Epoch 10 | Train Loss: 0.4831 | Val Loss: 0.3081 | Free GPU: 21.38 GB




💾 Best model saved at epoch 11 | Val Loss: 0.3059
✅ Epoch 11 | Train Loss: 0.4627 | Val Loss: 0.3059 | Free GPU: 21.40 GB




💾 Best model saved at epoch 12 | Val Loss: 0.2970
✅ Epoch 12 | Train Loss: 0.4383 | Val Loss: 0.2970 | Free GPU: 21.41 GB




💾 Best model saved at epoch 13 | Val Loss: 0.2806
✅ Epoch 13 | Train Loss: 0.4112 | Val Loss: 0.2806 | Free GPU: 21.36 GB




✅ Epoch 14 | Train Loss: 0.3931 | Val Loss: 0.2823 | Free GPU: 21.33 GB




💾 Best model saved at epoch 15 | Val Loss: 0.2771
✅ Epoch 15 | Train Loss: 0.3702 | Val Loss: 0.2771 | Free GPU: 21.33 GB




✅ Epoch 16 | Train Loss: 0.3628 | Val Loss: 0.3317 | Free GPU: 21.30 GB




💾 Best model saved at epoch 17 | Val Loss: 0.2617
✅ Epoch 17 | Train Loss: 0.3452 | Val Loss: 0.2617 | Free GPU: 21.35 GB




✅ Epoch 18 | Train Loss: 0.3271 | Val Loss: 0.2642 | Free GPU: 21.32 GB




✅ Epoch 19 | Train Loss: 0.3219 | Val Loss: 0.2966 | Free GPU: 21.33 GB




✅ Epoch 20 | Train Loss: 0.3263 | Val Loss: 0.2726 | Free GPU: 21.41 GB




✅ Epoch 21 | Train Loss: 0.2998 | Val Loss: 0.2822 | Free GPU: 21.41 GB




✅ Epoch 22 | Train Loss: 0.2875 | Val Loss: 0.2669 | Free GPU: 21.32 GB




✅ Epoch 23 | Train Loss: 0.2782 | Val Loss: 0.2711 | Free GPU: 21.32 GB




💾 Best model saved at epoch 24 | Val Loss: 0.2576
✅ Epoch 24 | Train Loss: 0.2669 | Val Loss: 0.2576 | Free GPU: 21.35 GB




✅ Epoch 25 | Train Loss: 0.2644 | Val Loss: 0.2699 | Free GPU: 21.33 GB




💾 Best model saved at epoch 26 | Val Loss: 0.2568
✅ Epoch 26 | Train Loss: 0.2586 | Val Loss: 0.2568 | Free GPU: 21.37 GB




✅ Epoch 27 | Train Loss: 0.2503 | Val Loss: 0.2653 | Free GPU: 21.31 GB




✅ Epoch 28 | Train Loss: 0.2434 | Val Loss: 0.2620 | Free GPU: 21.40 GB




✅ Epoch 29 | Train Loss: 0.2392 | Val Loss: 0.2664 | Free GPU: 21.24 GB




✅ Epoch 30 | Train Loss: 0.2331 | Val Loss: 0.2840 | Free GPU: 21.27 GB




✅ Epoch 31 | Train Loss: 0.2301 | Val Loss: 0.2724 | Free GPU: 21.33 GB




✅ Epoch 32 | Train Loss: 0.2276 | Val Loss: 0.2683 | Free GPU: 21.29 GB




✅ Epoch 33 | Train Loss: 0.2209 | Val Loss: 0.2612 | Free GPU: 21.31 GB




✅ Epoch 34 | Train Loss: 0.2145 | Val Loss: 0.2708 | Free GPU: 21.39 GB




✅ Epoch 35 | Train Loss: 0.2119 | Val Loss: 0.2690 | Free GPU: 21.42 GB




✅ Epoch 36 | Train Loss: 0.2068 | Val Loss: 0.2710 | Free GPU: 21.28 GB




✅ Epoch 37 | Train Loss: 0.2029 | Val Loss: 0.2792 | Free GPU: 21.37 GB




✅ Epoch 38 | Train Loss: 0.1988 | Val Loss: 0.2832 | Free GPU: 21.44 GB




✅ Epoch 39 | Train Loss: 0.1962 | Val Loss: 0.2811 | Free GPU: 21.42 GB




✅ Epoch 40 | Train Loss: 0.1944 | Val Loss: 0.2797 | Free GPU: 21.31 GB




✅ Epoch 41 | Train Loss: 0.1985 | Val Loss: 0.2758 | Free GPU: 21.31 GB




✅ Epoch 42 | Train Loss: 0.2174 | Val Loss: 0.2832 | Free GPU: 21.23 GB




✅ Epoch 43 | Train Loss: 0.1937 | Val Loss: 0.2808 | Free GPU: 21.27 GB




✅ Epoch 44 | Train Loss: 0.1855 | Val Loss: 0.2854 | Free GPU: 21.33 GB




✅ Epoch 45 | Train Loss: 0.1821 | Val Loss: 0.2812 | Free GPU: 21.35 GB




✅ Epoch 46 | Train Loss: 0.1783 | Val Loss: 0.2824 | Free GPU: 21.34 GB




✅ Epoch 47 | Train Loss: 0.1789 | Val Loss: 0.2870 | Free GPU: 21.29 GB




✅ Epoch 48 | Train Loss: 0.1771 | Val Loss: 0.2930 | Free GPU: 21.34 GB




✅ Epoch 49 | Train Loss: 0.1726 | Val Loss: 0.2916 | Free GPU: 21.36 GB




✅ Epoch 50 | Train Loss: 0.1706 | Val Loss: 0.2909 | Free GPU: 21.40 GB
🏁 BiSeNet training complete.


In [18]:
import sys
sys.path.append("/content/MLDL2024_project1")


In [20]:
!ls /content/MLDL2024_project1/models/bisenet


build_bisenet.py  build_contextpath.py	__pycache__


In [21]:
import sys
sys.path.append("/content/MLDL2024_project1")

from models.bisenet.build_bisenet import BiSeNet


Model Evaluation

In [22]:
import torch

# Path to your saved model
model_path = "/content/drive/MyDrive/Semantic_Segmentation/bisenet_best_model.pth"

# Initialize the model with the correct number of classes and backbone
model = BiSeNet(num_classes=19, context_path="resnet18")
model.load_state_dict(torch.load(model_path, map_location='cuda'))
model.eval().to('cuda')

print("✅ BiSeNet model loaded and ready for evaluation.")


✅ BiSeNet model loaded and ready for evaluation.


In [23]:
import numpy as np
from tqdm import tqdm

def compute_miou(preds, labels, num_classes=19):
    ious = []
    preds = preds.view(-1).cpu().numpy()
    labels = labels.view(-1).cpu().numpy()
    for cls in range(num_classes):
        pred_inds = preds == cls
        target_inds = labels == cls
        intersection = (pred_inds & target_inds).sum()
        union = (pred_inds | target_inds).sum()
        if union == 0:
            ious.append(float('nan'))  # Skip class
        else:
            ious.append(intersection / union)
    return np.nanmean(ious)

# Evaluation loop
model.eval()
ious = []

with torch.no_grad():
    for imgs, masks in tqdm(val_loader, desc="🔍 Evaluating mIoU"):
        imgs = imgs.to('cuda')
        masks = masks.squeeze(1).long().to('cuda')

        outputs = model(imgs)
        if isinstance(outputs, tuple):  # Only use main output if multiple
            outputs = outputs[0]

        preds = torch.argmax(outputs, dim=1)
        iou = compute_miou(preds, masks, num_classes=19)
        ious.append(iou)

mean_iou = np.nanmean(ious)
print(f"📊 Final mIoU: {mean_iou:.4f}")


🔍 Evaluating mIoU: 100%|██████████| 250/250 [00:30<00:00,  8.27it/s]

📊 Final mIoU: 0.4211





In [24]:
import torch
import time
import numpy as np

# ✅ Ensure the model is in evaluation mode and on CUDA
model.eval()
model.to('cuda')

# ✅ input: batch size = 1, 3 channels, 512 height, 1024 width
input_tensor = torch.randn(1, 3, 512, 1024).to('cuda')

# ✅ Warm-up to stabilize performance
for _ in range(10):
    with torch.no_grad():
        _ = model(input_tensor)

# ✅ Measure latency
latencies = []
iterations = 100

for _ in range(iterations):
    start_time = time.time()
    with torch.no_grad():
        _ = model(input_tensor)
    torch.cuda.synchronize()
    end_time = time.time()
    latencies.append((end_time - start_time) * 1000)  # convert to ms

# ✅ Compute results
mean_latency = np.mean(latencies)
std_latency = np.std(latencies)
fps = 1000 / mean_latency

print(f"⏱️ Latency: {mean_latency:.2f} ± {std_latency:.2f} ms/image")
print(f"🚀 FPS: {fps:.2f}")


⏱️ Latency: 6.96 ± 0.84 ms/image
🚀 FPS: 143.72


In [25]:
!pip install -q fvcore


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for fvcore (setup.py) ... [?25l[?25hdone
  Building wheel for iopath (setup.py) ... [?25l[?25hdone


In [26]:
import torch
from fvcore.nn import FlopCountAnalysis, parameter_count_table
from models.bisenet.build_bisenet import BiSeNet

# ✅ Reinitialize the model (if needed)
model = BiSeNet(num_classes=19, context_path='resnet18').to('cuda')
model.eval()

# ✅ input matching Cityscapes size
dummy_input = torch.randn(1, 3, 512, 1024).to('cuda')

# ✅ Compute FLOPs and parameters
flops = FlopCountAnalysis(model, dummy_input)
params = parameter_count_table(model)

print(f"🔢 Total FLOPs: {flops.total() / 1e9:.2f} GFLOPs")
print(params)


context_path.features.avgpool, context_path.features.fc, supervision1, supervision2


🔢 Total FLOPs: 25.78 GFLOPs
| name                                        | #elements or shape   |
|:--------------------------------------------|:---------------------|
| model                                       | 12.6M                |
|  saptial_path                               |  0.4M                |
|   saptial_path.convblock1                   |   1.9K               |
|    saptial_path.convblock1.conv1            |    1.7K              |
|    saptial_path.convblock1.bn               |    0.1K              |
|   saptial_path.convblock2                   |   74.0K              |
|    saptial_path.convblock2.conv1            |    73.7K             |
|    saptial_path.convblock2.bn               |    0.3K              |
|   saptial_path.convblock3                   |   0.3M               |
|    saptial_path.convblock3.conv1            |    0.3M              |
|    saptial_path.convblock3.bn               |    0.5K              |
|  context_path                               |  