In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import zipfile
import os

zip_path = '/content/drive/MyDrive/Semantic_Segmentation/Cityscapes.zip'
extract_path = '/content/datasets/Cityscapes'

# Create destination directory
os.makedirs(extract_path, exist_ok=True)

# Extract
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Cityscapes dataset extracted to:", extract_path)

✅ Cityscapes dataset extracted to: /content/datasets/Cityscapes


In [3]:
import os
import shutil

# Define paths
nested_path = '/content/datasets/Cityscapes/Cityscapes/Cityspaces'
target_path = '/content/datasets/Cityscapes'

# Move folders to correct target location
shutil.move(os.path.join(nested_path, 'gtFine'), os.path.join(target_path, 'gtFine'))
shutil.move(os.path.join(nested_path, 'images'), os.path.join(target_path, 'leftImg8bit'))

# Cleanup: remove wrongly nested folders
shutil.rmtree('/content/datasets/Cityscapes/Cityscapes')

print("✅ Folder structure corrected.")

✅ Folder structure corrected.


In [4]:
import torch

free_mem = torch.cuda.mem_get_info()[0] / (1024 ** 3)  # in GB
total_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
print(f"✅ GPU memory available: {free_mem:.2f} GB / {total_mem:.2f} GB")

✅ GPU memory available: 14.64 GB / 14.74 GB


In [5]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# ✅ Adjusted paths
root_dir = "/content/datasets/Cityscapes"
images_base = os.path.join(root_dir, "leftImg8bit")
masks_base = os.path.join(root_dir, "gtFine")

# ✅ Custom Cityscapes Dataset
class CityscapesDataset(Dataset):
    def __init__(self, split='train', transform=None, target_transform=None):
        self.split = split
        self.transform = transform
        self.target_transform = target_transform

        self.images = []
        self.masks = []

        cities_path = os.path.join(images_base, split)
        for city in os.listdir(cities_path):
            img_dir = os.path.join(cities_path, city)
            mask_dir = os.path.join(masks_base, split, city)

            for file_name in os.listdir(img_dir):
                if file_name.endswith("_leftImg8bit.png"):
                    base = file_name.replace("_leftImg8bit.png", "")
                    img_path = os.path.join(img_dir, file_name)
                    mask_path = os.path.join(mask_dir, base + "_gtFine_labelTrainIds.png")
                    if os.path.exists(mask_path):  # validate existence
                        self.images.append(img_path)
                        self.masks.append(mask_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx]).convert('RGB')
        mask = Image.open(self.masks[idx])  # labelTrainIds already applied

        if self.transform:
            img = self.transform(img)
        if self.target_transform:
            mask = self.target_transform(mask)

        return img, mask

# ✅ Transformations (1024x512 as required)
image_transform = transforms.Compose([
    transforms.Resize((512, 1024)),  # height, width
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

mask_transform = transforms.Compose([
    transforms.Resize((512, 1024), interpolation=Image.NEAREST),
    transforms.PILToTensor(),
    transforms.Lambda(lambda x: x.long())  # ✅ FIX: ensure target is LongTensor
])

# ✅ Initialize datasets
train_dataset = CityscapesDataset(split='train', transform=image_transform, target_transform=mask_transform)
val_dataset = CityscapesDataset(split='val', transform=image_transform, target_transform=mask_transform)

# ✅ DataLoaders
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=2, pin_memory=True)

print(f"✅ Dataset loaded: {len(train_dataset)} training samples, {len(val_dataset)} validation samples")

✅ Dataset loaded: 1572 training samples, 500 validation samples


In [6]:
!git clone https://github.com/Gabrysse/MLDL2024_project1.git

Cloning into 'MLDL2024_project1'...
remote: Enumerating objects: 34, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 34 (delta 9), reused 3 (delta 3), pack-reused 15 (from 1)[K
Receiving objects: 100% (34/34), 11.29 KiB | 11.29 MiB/s, done.
Resolving deltas: 100% (9/9), done.


In [7]:
import sys
sys.path.append('/content/MLDL2024_project1')

from models.deeplabv2.deeplabv2 import get_deeplab_v2
import torch

# ✅ Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("✅ Using device:", device)

# ✅ Pretrained model path
pretrain_model_path = '/content/drive/MyDrive/Semantic_Segmentation/deeplab_resnet_pretrained_imagenet.pth'

# ✅ Load model with reduced memory (half precision)
model = get_deeplab_v2(num_classes=19, pretrain=True, pretrain_model_path=pretrain_model_path)
model = model.to(device)
print("✅ Model loaded successfully.")

✅ Using device: cuda
Deeplab pretraining loading...
✅ Model loaded successfully.


In [8]:
import torch

free_mem = torch.cuda.mem_get_info()[0] / (1024 ** 3)
total_mem = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)

print(f"🔍 GPU Memory: {free_mem:.2f} GB free / {total_mem:.2f} GB total")

🔍 GPU Memory: 14.47 GB free / 14.74 GB total


In [9]:
import torch.nn as nn
import torch.optim as optim
from torch.amp import autocast, GradScaler

# Loss function
criterion = nn.CrossEntropyLoss(ignore_index=255)

# Optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

# Mixed precision scaler
scaler = GradScaler(device='cuda')

print("✅ Loss, optimizer, and mixed-precision scaler initialized.")

✅ Loss, optimizer, and mixed-precision scaler initialized.


In [10]:
# ✅ Sanity check for label dtype and values
img, mask = train_dataset[0]
print("Image dtype:", img.dtype, "| Shape:", img.shape)
print("Mask dtype:", mask.dtype, "| Shape:", mask.shape)
print("Unique label values:", torch.unique(mask))

Image dtype: torch.float32 | Shape: torch.Size([3, 512, 1024])
Mask dtype: torch.int64 | Shape: torch.Size([1, 512, 1024])
Unique label values: tensor([  0,   1,   2,   5,   7,  10,  11,  12,  13,  18, 255])


Training Check

In [11]:
from tqdm import tqdm
import torch
import gc
from torch.amp import autocast, GradScaler

# Set number of epochs (change to 50 after debugging)
num_epochs = 5

print("🟢 Starting training...")
train_losses = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]", leave=False)

    for images, targets in loop:
        # ✅ Move tensors to GPU
        images = images.to(device)
        targets = targets.squeeze(1).long().to(device)  # remove channel dim, ensure long type

        # ✅ Zero out gradients
        optimizer.zero_grad()

        # ✅ Forward + backward with mixed precision
        with autocast(device_type='cuda'):
            outputs, _, _ = model(images)
            loss = criterion(outputs, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    avg_loss = running_loss / len(train_loader)
    train_losses.append(avg_loss)

    # ✅ GPU cleanup + memory stats
    torch.cuda.empty_cache()
    gc.collect()
    free_mem = torch.cuda.mem_get_info()[0] / (1024 ** 3)
    print(f"✅ Epoch {epoch+1} finished | Loss: {avg_loss:.4f} | Free GPU: {free_mem:.2f} GB")

print("🏁 Training complete.")


🟢 Starting training...




✅ Epoch 1 finished | Loss: 1.2620 | Free GPU: 13.53 GB




✅ Epoch 2 finished | Loss: 0.7476 | Free GPU: 13.49 GB




✅ Epoch 3 finished | Loss: 0.6286 | Free GPU: 13.45 GB




✅ Epoch 4 finished | Loss: 0.5534 | Free GPU: 13.50 GB




✅ Epoch 5 finished | Loss: 0.5059 | Free GPU: 13.36 GB
🏁 Training complete.


Training

In [11]:
import os
import torch
from tqdm import tqdm
from torch.amp import autocast
import torch.nn.functional as F
import gc
from sklearn.metrics import confusion_matrix
import numpy as np

# 🧪 Simple mIoU Calculation
def compute_mIoU(preds, labels, num_classes=19, ignore_index=255):
    preds = preds.cpu().numpy().flatten()
    labels = labels.cpu().numpy().flatten()

    # Filter out ignore_index
    mask = labels != ignore_index
    preds = preds[mask]
    labels = labels[mask]

    conf_matrix = confusion_matrix(labels, preds, labels=list(range(num_classes)))
    intersection = np.diag(conf_matrix)
    union = conf_matrix.sum(1) + conf_matrix.sum(0) - np.diag(conf_matrix)

    IoU = intersection / np.maximum(union, 1)
    mIoU = np.mean(IoU)
    return mIoU

# 🔁 Config
num_epochs = 50
best_val_loss = float('inf')
save_path = '/content/best_deeplabv2_cityscapes.pth'

print("🟢 Starting full training...")
train_losses = []
val_losses = []
val_mious = []

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{num_epochs}]", leave=False)

    for images, targets in loop:
        images = images.to(device)
        targets = targets.long().squeeze(1).to(device)

        optimizer.zero_grad()
        with autocast('cuda'):
            outputs, _, _ = model(images)
            loss = criterion(outputs, targets)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()
        loop.set_postfix(train_loss=loss.item())

    avg_train_loss = running_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # 🧪 VALIDATION
    model.eval()
    val_loss = 0.0
    miou_total = 0.0
    with torch.no_grad():
        for val_imgs, val_masks in val_loader:
            val_imgs = val_imgs.to(device)
            val_masks = val_masks.long().squeeze(1).to(device)

            with autocast(device_type='cuda'):
                val_outputs = model(val_imgs)
                loss = criterion(val_outputs, val_masks)
                val_loss += loss.item()

                preds = torch.argmax(val_outputs, dim=1)
                miou = compute_mIoU(preds, val_masks)
                miou_total += miou

    avg_val_loss = val_loss / len(val_loader)
    avg_miou = miou_total / len(val_loader)
    val_losses.append(avg_val_loss)
    val_mious.append(avg_miou)

    # 💾 Save best model
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), save_path)
        print(f"💾 Best model saved at epoch {epoch+1} | Val Loss: {avg_val_loss:.4f} | mIoU: {avg_miou:.4f}")

    # ♻️ Clean up
    torch.cuda.empty_cache()
    gc.collect()
    mem_free = torch.cuda.mem_get_info()[0] / (1024 ** 3)

    print(f"✅ Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | mIoU: {avg_miou:.4f} | Free GPU: {mem_free:.2f} GB")

print("🏁 Training complete.")
print(f"📉 Best Validation Loss: {best_val_loss:.4f}")


🟢 Starting full training...




💾 Best model saved at epoch 1 | Val Loss: 0.3657 | mIoU: 0.3353
✅ Epoch 1 | Train Loss: 0.4809 | Val Loss: 0.3657 | mIoU: 0.3353 | Free GPU: 13.41 GB




💾 Best model saved at epoch 2 | Val Loss: 0.2813 | mIoU: 0.3683
✅ Epoch 2 | Train Loss: 0.2433 | Val Loss: 0.2813 | mIoU: 0.3683 | Free GPU: 13.52 GB




💾 Best model saved at epoch 3 | Val Loss: 0.2519 | mIoU: 0.3945
✅ Epoch 3 | Train Loss: 0.1852 | Val Loss: 0.2519 | mIoU: 0.3945 | Free GPU: 13.49 GB




💾 Best model saved at epoch 4 | Val Loss: 0.2318 | mIoU: 0.4076
✅ Epoch 4 | Train Loss: 0.1553 | Val Loss: 0.2318 | mIoU: 0.4076 | Free GPU: 13.36 GB




💾 Best model saved at epoch 5 | Val Loss: 0.2272 | mIoU: 0.4135
✅ Epoch 5 | Train Loss: 0.1345 | Val Loss: 0.2272 | mIoU: 0.4135 | Free GPU: 13.33 GB




💾 Best model saved at epoch 6 | Val Loss: 0.2267 | mIoU: 0.4211
✅ Epoch 6 | Train Loss: 0.1172 | Val Loss: 0.2267 | mIoU: 0.4211 | Free GPU: 13.26 GB




✅ Epoch 7 | Train Loss: 0.1051 | Val Loss: 0.2282 | mIoU: 0.4178 | Free GPU: 13.33 GB




✅ Epoch 8 | Train Loss: 0.0974 | Val Loss: 0.2318 | mIoU: 0.4226 | Free GPU: 13.33 GB




✅ Epoch 9 | Train Loss: 0.0898 | Val Loss: 0.2333 | mIoU: 0.4205 | Free GPU: 13.27 GB




✅ Epoch 10 | Train Loss: 0.0846 | Val Loss: 0.2360 | mIoU: 0.4231 | Free GPU: 13.25 GB




✅ Epoch 11 | Train Loss: 0.0798 | Val Loss: 0.2407 | mIoU: 0.4230 | Free GPU: 13.30 GB




✅ Epoch 12 | Train Loss: 0.0755 | Val Loss: 0.2417 | mIoU: 0.4231 | Free GPU: 13.23 GB




✅ Epoch 13 | Train Loss: 0.0743 | Val Loss: 0.2445 | mIoU: 0.4267 | Free GPU: 13.19 GB




✅ Epoch 14 | Train Loss: 0.0714 | Val Loss: 0.2417 | mIoU: 0.4271 | Free GPU: 13.19 GB




✅ Epoch 15 | Train Loss: 0.0675 | Val Loss: 0.2453 | mIoU: 0.4246 | Free GPU: 13.21 GB




✅ Epoch 16 | Train Loss: 0.0639 | Val Loss: 0.2459 | mIoU: 0.4284 | Free GPU: 13.32 GB




✅ Epoch 17 | Train Loss: 0.0612 | Val Loss: 0.2509 | mIoU: 0.4267 | Free GPU: 13.25 GB




✅ Epoch 18 | Train Loss: 0.0587 | Val Loss: 0.2506 | mIoU: 0.4287 | Free GPU: 13.25 GB




✅ Epoch 19 | Train Loss: 0.0570 | Val Loss: 0.2534 | mIoU: 0.4263 | Free GPU: 13.28 GB




✅ Epoch 20 | Train Loss: 0.0554 | Val Loss: 0.2579 | mIoU: 0.4287 | Free GPU: 13.35 GB




✅ Epoch 21 | Train Loss: 0.0535 | Val Loss: 0.2599 | mIoU: 0.4269 | Free GPU: 13.25 GB




✅ Epoch 22 | Train Loss: 0.0519 | Val Loss: 0.2655 | mIoU: 0.4260 | Free GPU: 13.26 GB




✅ Epoch 23 | Train Loss: 0.0505 | Val Loss: 0.2626 | mIoU: 0.4287 | Free GPU: 13.34 GB




✅ Epoch 24 | Train Loss: 0.0489 | Val Loss: 0.2707 | mIoU: 0.4301 | Free GPU: 13.28 GB




✅ Epoch 25 | Train Loss: 0.0482 | Val Loss: 0.2606 | mIoU: 0.4295 | Free GPU: 13.26 GB




✅ Epoch 26 | Train Loss: 0.0470 | Val Loss: 0.2718 | mIoU: 0.4282 | Free GPU: 13.28 GB




✅ Epoch 27 | Train Loss: 0.0459 | Val Loss: 0.2731 | mIoU: 0.4271 | Free GPU: 13.25 GB




✅ Epoch 28 | Train Loss: 0.0452 | Val Loss: 0.2745 | mIoU: 0.4304 | Free GPU: 13.30 GB




✅ Epoch 29 | Train Loss: 0.0445 | Val Loss: 0.2754 | mIoU: 0.4313 | Free GPU: 13.26 GB




✅ Epoch 30 | Train Loss: 0.0435 | Val Loss: 0.2749 | mIoU: 0.4305 | Free GPU: 13.25 GB




✅ Epoch 31 | Train Loss: 0.0429 | Val Loss: 0.2776 | mIoU: 0.4311 | Free GPU: 13.32 GB




✅ Epoch 32 | Train Loss: 0.0422 | Val Loss: 0.2748 | mIoU: 0.4325 | Free GPU: 13.21 GB




✅ Epoch 33 | Train Loss: 0.0412 | Val Loss: 0.2818 | mIoU: 0.4317 | Free GPU: 13.28 GB




✅ Epoch 34 | Train Loss: 0.0403 | Val Loss: 0.2844 | mIoU: 0.4318 | Free GPU: 13.26 GB




✅ Epoch 35 | Train Loss: 0.0396 | Val Loss: 0.2891 | mIoU: 0.4294 | Free GPU: 13.23 GB




✅ Epoch 36 | Train Loss: 0.0390 | Val Loss: 0.2923 | mIoU: 0.4282 | Free GPU: 13.27 GB




✅ Epoch 37 | Train Loss: 0.0386 | Val Loss: 0.2836 | mIoU: 0.4320 | Free GPU: 13.26 GB




✅ Epoch 38 | Train Loss: 0.0383 | Val Loss: 0.2893 | mIoU: 0.4303 | Free GPU: 13.25 GB




✅ Epoch 39 | Train Loss: 0.0379 | Val Loss: 0.2885 | mIoU: 0.4318 | Free GPU: 13.30 GB




✅ Epoch 40 | Train Loss: 0.0373 | Val Loss: 0.2910 | mIoU: 0.4328 | Free GPU: 13.24 GB




✅ Epoch 41 | Train Loss: 0.0368 | Val Loss: 0.2946 | mIoU: 0.4296 | Free GPU: 13.25 GB




✅ Epoch 42 | Train Loss: 0.0362 | Val Loss: 0.2859 | mIoU: 0.4343 | Free GPU: 13.30 GB




✅ Epoch 43 | Train Loss: 0.0361 | Val Loss: 0.2952 | mIoU: 0.4316 | Free GPU: 13.26 GB




✅ Epoch 44 | Train Loss: 0.0364 | Val Loss: 0.2975 | mIoU: 0.4298 | Free GPU: 13.32 GB




✅ Epoch 45 | Train Loss: 0.0356 | Val Loss: 0.2979 | mIoU: 0.4321 | Free GPU: 13.33 GB




✅ Epoch 46 | Train Loss: 0.0350 | Val Loss: 0.3001 | mIoU: 0.4301 | Free GPU: 13.30 GB




✅ Epoch 47 | Train Loss: 0.0346 | Val Loss: 0.2983 | mIoU: 0.4329 | Free GPU: 13.33 GB




✅ Epoch 48 | Train Loss: 0.0340 | Val Loss: 0.2983 | mIoU: 0.4340 | Free GPU: 13.21 GB




✅ Epoch 49 | Train Loss: 0.0336 | Val Loss: 0.3084 | mIoU: 0.4317 | Free GPU: 13.18 GB




✅ Epoch 50 | Train Loss: 0.0335 | Val Loss: 0.3051 | mIoU: 0.4328 | Free GPU: 13.21 GB
🏁 Training complete.
📉 Best Validation Loss: 0.2267


In [12]:
import shutil

# ✅ Destination path on the Drive
drive_path = "/content/drive/MyDrive/Semantic_Segmentation/best_deeplabv2_cityscapes.pth"

# ✅ Copy model from Colab to Drive
shutil.copy("/content/best_deeplabv2_cityscapes.pth", drive_path)

print("✅ Model saved to Google Drive.")


✅ Model saved to Google Drive.


Latency

In [13]:
import time
import torch
import numpy as np

model.eval()  # ✅ inference mode

dummy_input = torch.randn(1, 3, 512, 1024).to(device)
iterations = 1000

latencies = []
fps_values = []

with torch.no_grad():
    for _ in range(iterations):
        start = time.time()
        _ = model(dummy_input)
        end = time.time()

        latency = end - start
        latencies.append(latency)
        fps_values.append(1 / latency)

mean_latency_ms = np.mean(latencies) * 1000
std_latency_ms = np.std(latencies) * 1000
mean_fps = np.mean(fps_values)
std_fps = np.std(fps_values)

print(f"⏱️ Latency: {mean_latency_ms:.2f} ± {std_latency_ms:.2f} ms/image")
print(f"🚀 FPS: {mean_fps:.2f} ± {std_fps:.2f}")


⏱️ Latency: 234.68 ± 12.26 ms/image
🚀 FPS: 4.41 ± 3.36


Flops

In [14]:
# ✅ Install fvcore if not already
!pip install -q fvcore

# ✅ Count FLOPs
from fvcore.nn import FlopCountAnalysis, flop_count_table

model.eval()
dummy_input = torch.randn(1, 3, 512, 1024).to(device)

flops = FlopCountAnalysis(model, dummy_input)
print(flop_count_table(flops, max_depth=1))


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for fvcore (setup.py) ... [?25l[?25hdone
  Building wheel for iopath (setup.py) ... [?25l[?25hdone
| module              | #parameters or shape   | #flops   |
|:--------------------|:-----------------------|:---------|
| model               | 43.901M                | 0.375T   |
|  conv1              |  9.408K                |  1.233G  |
|  bn1                |  0.128K                |  16.777M |
|  layer1             |  0.216M                |  7.155G  |
|  layer2             |  1.22M                 |  10.226G |
|  layer3             |  26.09M                |  0.219T  |
|  layer4             |  14.965M          

Count Model Parameters

In [15]:
# ✅ Count total parameters
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"📦 Total Trainable Parameters: {total_params / 1e6:.2f} Million")


📦 Total Trainable Parameters: 43.80 Million
