In [None]:
# Stage 0 — Imports and global constants
import os
import random
import json
import shutil
from pathlib import Path

import numpy as np
from PIL import Image
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms.functional as TF
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import matplotlib.pyplot as plt

# Global configuration
DATA_ROOT = "./"            # project root (AFML_project)
CAMVID_ROOT = "camvid"
CHECKPOINT_DIR = "outputs/checkpoints"
OUTPUT_COMP = "outputs/comparisons"
OUTPUT_DET = "outputs/detection"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
SEED = 42
LR = 3e-4
EPOCHS = 60
BATCH_SIZE = 4
NUM_CLASSES = 11
IGNORE_INDEX = 255
IMG_SIZE = 512

os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(OUTPUT_COMP, exist_ok=True)
os.makedirs(OUTPUT_DET, exist_ok=True)

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

print("Device:", DEVICE, "LR:", LR, "EPOCHS:", EPOCHS, "BATCH:", BATCH_SIZE)

Device: cpu LR: 0.0003 EPOCHS: 60 BATCH: 4


In [21]:
# Stage 1 — Create folder structure and optionally extract zips
folders = [
    f"{CAMVID_ROOT}/images",
    f"{CAMVID_ROOT}/labels",
    f"{CAMVID_ROOT}/labels_processed",
    f"{CAMVID_ROOT}/splits",
    f"{CAMVID_ROOT}/mapping",
    f"{CAMVID_ROOT}/raw_images",
    f"{CAMVID_ROOT}/raw_labels",
]
for f in folders:
    os.makedirs(f, exist_ok=True)

# If zip files exist in DATA_ROOT, extract to raw folders (safe: only if files exist)
import zipfile
zip_images = os.path.join(DATA_ROOT, "701_StillsRaw_full.zip")
zip_labels = os.path.join(DATA_ROOT, "LabeledApproved_full.zip")

if os.path.exists(zip_images) and not os.listdir(f"{CAMVID_ROOT}/raw_images"):
    with zipfile.ZipFile(zip_images, "r") as z:
        z.extractall(f"{CAMVID_ROOT}/raw_images")
    print("Extracted images zip")

if os.path.exists(zip_labels) and not os.listdir(f"{CAMVID_ROOT}/raw_labels"):
    with zipfile.ZipFile(zip_labels, "r") as z:
        z.extractall(f"{CAMVID_ROOT}/raw_labels")
    print("Extracted labels zip")

print("Folder structure ready.")

Extracted images zip
Extracted labels zip
Folder structure ready.


In [22]:
# Stage 2 — Move and rename files from raw dirs into camvid/images and camvid/labels

def find_real_dir(path):
    sub = [os.path.join(path, d) for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    return sub[0] if sub else path

raw_img_dir = find_real_dir(f"{CAMVID_ROOT}/raw_images")
raw_lbl_dir = find_real_dir(f"{CAMVID_ROOT}/raw_labels")

img_dst = f"{CAMVID_ROOT}/images"
lbl_dst = f"{CAMVID_ROOT}/labels"

# Move labels (rename _L/_P)
for fname in sorted(os.listdir(raw_lbl_dir)):
    if fname.lower().endswith(".png"):
        new = fname.replace("_L", "").replace("_P", "")
        src = os.path.join(raw_lbl_dir, fname)
        dst = os.path.join(lbl_dst, new)
        if not os.path.exists(dst):
            shutil.move(src, dst)

# Move images
for fname in sorted(os.listdir(raw_img_dir)):
    if fname.lower().endswith((".png", ".jpg", ".jpeg")):
        src = os.path.join(raw_img_dir, fname)
        dst = os.path.join(img_dst, fname)
        if not os.path.exists(dst):
            shutil.move(src, dst)

print("Image and label files moved to camvid/images and camvid/labels")

Image and label files moved to camvid/images and camvid/labels


In [23]:
# Stage 3 — Create train/val/test split files if not present
img_dst = f"{CAMVID_ROOT}/images"
split_dir = f"{CAMVID_ROOT}/splits"
os.makedirs(split_dir, exist_ok=True)

all_files = sorted([f.replace(".png","") for f in os.listdir(img_dst) if f.endswith(".png")])
if not all_files:
    raise ValueError("No .png images found in camvid/images — check path")

# Only create splits if not present
if not (os.path.exists(f"{split_dir}/train.txt") and os.path.exists(f"{split_dir}/val.txt") and os.path.exists(f"{split_dir}/test.txt")):
    train, temp = train_test_split(all_files, test_size=0.3, random_state=SEED)
    val, test = train_test_split(temp, test_size=0.5, random_state=SEED)
    for name, data in [("train", train), ("val", val), ("test", test)]:
        with open(f"{split_dir}/{name}.txt", "w") as f:
            f.writelines("\n".join(data))
    print("Created new train/val/test splits")
else:
    print("Splits already exist — using existing split files")

Created new train/val/test splits


In [24]:
# Stage 4 — Convert colored labels to single-channel class-ID masks (skip if done)
color_to_id = {
    "(128,128,128)": 0,
    "(128,0,0)"    : 1,
    "(192,192,128)": 2,
    "(128,64,128)" : 3,
    "(0,0,192)"    : 4,
    "(128,128,0)"  : 5,
    "(192,128,128)": 6,
    "(64,64,128)"  : 7,
    "(64,0,128)"   : 8,
    "(64,64,0)"    : 9,
    "(0,128,192)"  : 10
}
with open(f"{CAMVID_ROOT}/mapping/color_to_classid.json", "w") as f:
    json.dump(color_to_id, f, indent=2)

lookup = {eval(k): v for k, v in color_to_id.items()}

lbl_src = f"{CAMVID_ROOT}/labels"
lbl_out = f"{CAMVID_ROOT}/labels_processed"
os.makedirs(lbl_out, exist_ok=True)

# Process only if labels_processed empty
if not os.listdir(lbl_out):
    count = 0
    for fname in sorted(os.listdir(lbl_src)):
        if not fname.endswith(".png"):
            continue
        mask = np.array(Image.open(os.path.join(lbl_src, fname)).convert("RGB"))
        h,w,_ = mask.shape
        new_mask = np.full((h,w), IGNORE_INDEX, dtype=np.uint8)
        for rgb, cid in lookup.items():
            new_mask[(mask == rgb).all(axis=2)] = cid
        Image.fromarray(new_mask).save(os.path.join(lbl_out, fname))
        count += 1
    print("Converted masks:", count)
else:
    print("Processed labels already present, skipping conversion")

Converted masks: 701


In [25]:
# Stage 5 — Transforms and Dataset / DataLoader
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

def train_transform(img, mask, size=IMG_SIZE):
    if random.random() < 0.5:
        img = TF.hflip(img)
        mask = torch.flip(mask, dims=[1])
    img = TF.resize(img, (size, size))
    mask = TF.resize(mask.unsqueeze(0).float(), (size,size), interpolation=Image.NEAREST).squeeze(0).long()
    img = TF.to_tensor(img)
    img = TF.normalize(img, IMAGENET_MEAN, IMAGENET_STD)
    return img, mask

def val_transform(img, mask, size=IMG_SIZE):
    img = TF.resize(img, (size, size))
    mask = TF.resize(mask.unsqueeze(0).float(), (size,size), interpolation=Image.NEAREST).squeeze(0).long()
    img = TF.to_tensor(img)
    img = TF.normalize(img, IMAGENET_MEAN, IMAGENET_STD)
    return img, mask

class CamVidDataset(Dataset):
    def __init__(self, root, split_file, transform_fn=None):
        self.root = Path(root)
        self.img_dir = self.root / "images"
        self.lbl_dir = self.root / "labels_processed"
        self.transform_fn = transform_fn
        with open(split_file, "r") as f:
            self.items = [x.strip() for x in f.readlines()]
    def __len__(self):
        return len(self.items)
    def __getitem__(self, idx):
        name = self.items[idx]
        img = Image.open(self.img_dir / (name + ".png")).convert("RGB")
        mask = Image.open(self.lbl_dir / (name + ".png"))
        mask = torch.from_numpy(np.array(mask)).long()
        if self.transform_fn:
            img, mask = self.transform_fn(img, mask)
        return img, mask

def make_loaders(root=CAMVID_ROOT, size=IMG_SIZE, batch_size=BATCH_SIZE, num_workers=0):
    train_set = CamVidDataset(root, f"{root}/splits/train.txt", transform_fn=lambda i,m: train_transform(i,m,size))
    val_set   = CamVidDataset(root, f"{root}/splits/val.txt", transform_fn=lambda i,m: val_transform(i,m,size))
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    val_loader   = DataLoader(val_set, batch_size=max(batch_size//2,1), shuffle=False, num_workers=num_workers, pin_memory=True)
    return train_loader, val_loader

print("Dataset and loaders ready (call make_loaders to create them).")

Dataset and loaders ready (call make_loaders to create them).


In [26]:
# Stage 6 — Fast-SCNN model definition (same as existing)
class DWConvBlock(nn.Module):
    def __init__(self, in_ch, out_ch, stride=1):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, in_ch, 3, stride=stride, padding=1, groups=in_ch, bias=False),
            nn.BatchNorm2d(in_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_ch, out_ch, 1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
        )
    def forward(self, x):
        return self.conv(x)

class LinearBottleneck(nn.Module):
    def __init__(self, in_ch, out_ch, t=6, stride=1):
        super().__init__()
        mid = in_ch * t
        self.use_res = (stride == 1 and in_ch == out_ch)
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, mid, 1, bias=False),
            nn.BatchNorm2d(mid),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid, mid, 3, stride=stride, padding=1, groups=mid, bias=False),
            nn.BatchNorm2d(mid),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid, out_ch, 1, bias=False),
            nn.BatchNorm2d(out_ch),
        )
    def forward(self, x):
        out = self.conv(x)
        if self.use_res:
            out = x + out
        return out

class GlobalFeatureExtractor(nn.Module):
    def __init__(self, in_ch, block_channels, t=6):
        super().__init__()
        layers = []
        for out_ch, stride in block_channels:
            layers.append(LinearBottleneck(in_ch, out_ch, t=t, stride=stride))
            in_ch = out_ch
        self.layers = nn.Sequential(*layers)
    def forward(self, x):
        return self.layers(x)

class FastSCNN(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super().__init__()
        self.learning_to_downsample = nn.Sequential(
            DWConvBlock(3, 32, stride=2),
            DWConvBlock(32, 48, stride=2),
            DWConvBlock(48, 64, stride=2),
        )
        self.global_feature_extractor = GlobalFeatureExtractor(in_ch=64, block_channels=[(96,2),(128,1),(128,1)], t=6)
        self.classifier = nn.Sequential(
            DWConvBlock(128, 128),
            nn.Dropout(0.1),
            nn.Conv2d(128, NUM_CLASSES, 1)
        )
    def forward(self, x):
        size = x.shape[2:]
        x = self.learning_to_downsample(x)
        x = self.global_feature_extractor(x)
        x = self.classifier(x)
        x = F.interpolate(x, size=size, mode="bilinear", align_corners=False)
        return x

# quick sanity check (optional)
with torch.no_grad():
    m = FastSCNN(num_classes=NUM_CLASSES)
    o = m(torch.randn(1,3,256,256))
    print("Fast-SCNN sanity output shape:", o.shape)

Fast-SCNN sanity output shape: torch.Size([1, 11, 256, 256])


In [27]:
# Stage 7 — Training utilities and train_model function
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    for imgs, masks in tqdm(loader, desc="Train batches"):
        imgs = imgs.to(device)
        masks = masks.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / max(1, len(loader))

def evaluate_miou(model, loader, device, num_classes=NUM_CLASSES, ignore_index=IGNORE_INDEX):
    model.eval()
    total_iou = 0.0
    count = 0
    with torch.no_grad():
        for imgs, masks in loader:
            imgs = imgs.to(device)
            masks = masks.to(device)
            outputs = model(imgs)
            preds = torch.argmax(outputs, dim=1)
            ious = []
            for cls in range(num_classes):
                pred_c = (preds == cls)
                mask_c = (masks == cls)
                valid = (masks != ignore_index)
                pred_c = pred_c & valid
                mask_c = mask_c & valid
                inter = (pred_c & mask_c).sum().item()
                union = (pred_c | mask_c).sum().item()
                if union > 0:
                    ious.append(inter / union)
            if len(ious) > 0:
                total_iou += sum(ious) / len(ious)
                count += 1
    return total_iou / max(1, count)

def train_model(root=CAMVID_ROOT, size=IMG_SIZE, num_classes=NUM_CLASSES, batch_size=BATCH_SIZE, lr=LR, epochs=EPOCHS, ignore_index=IGNORE_INDEX):
    device = DEVICE
    train_loader, val_loader = make_loaders(root, size, batch_size)
    model = FastSCNN(num_classes=num_classes).to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=ignore_index)
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

    best_miou = 0.0
    for epoch in range(1, epochs+1):
        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
        val_miou = evaluate_miou(model, val_loader, device, num_classes, ignore_index)
        print(f"Epoch: {epoch} Train Loss: {train_loss:.4f} Val mIoU: {val_miou:.4f}")
        if val_miou > best_miou:
            best_miou = val_miou
            torch.save(model.state_dict(), os.path.join(CHECKPOINT_DIR, "best_camvid.pth"))
            print("Saved best model")
        scheduler.step()
    torch.save(model.state_dict(), os.path.join(CHECKPOINT_DIR, "last_camvid.pth"))
    print("Training complete. Best mIoU:", best_miou)
    return model

In [28]:
# Stage 8 — Start training (run this cell to train)
# Note: training may take long on CPU. If you run on CPU, shorten epochs.
trained_model = train_model(
    root=CAMVID_ROOT,
    size=IMG_SIZE,
    num_classes=NUM_CLASSES,
    batch_size=BATCH_SIZE,
    lr=LR,
    epochs=EPOCHS,
    ignore_index=IGNORE_INDEX
)

Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.78it/s]


Epoch: 1 Train Loss: 1.1312 Val mIoU: 0.2837
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.70it/s]


Epoch: 2 Train Loss: 0.6675 Val mIoU: 0.3622
Saved best model


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.89it/s]


Epoch: 3 Train Loss: 0.5457 Val mIoU: 0.3754
Saved best model


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.92it/s]


Epoch: 4 Train Loss: 0.4846 Val mIoU: 0.3974
Saved best model


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.90it/s]


Epoch: 5 Train Loss: 0.4456 Val mIoU: 0.3871


Train batches: 100%|██████████| 123/123 [00:24<00:00,  4.97it/s]


Epoch: 6 Train Loss: 0.4062 Val mIoU: 0.4063
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.59it/s]


Epoch: 7 Train Loss: 0.3859 Val mIoU: 0.4328
Saved best model


Train batches: 100%|██████████| 123/123 [00:27<00:00,  4.53it/s]


Epoch: 8 Train Loss: 0.3692 Val mIoU: 0.4222


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.58it/s]


Epoch: 9 Train Loss: 0.3515 Val mIoU: 0.4424
Saved best model


Train batches: 100%|██████████| 123/123 [00:27<00:00,  4.54it/s]


Epoch: 10 Train Loss: 0.3340 Val mIoU: 0.4471
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.60it/s]


Epoch: 11 Train Loss: 0.3200 Val mIoU: 0.4596
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.60it/s]


Epoch: 12 Train Loss: 0.3092 Val mIoU: 0.4692
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.62it/s]


Epoch: 13 Train Loss: 0.2996 Val mIoU: 0.4673


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.66it/s]


Epoch: 14 Train Loss: 0.2891 Val mIoU: 0.4730
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.65it/s]


Epoch: 15 Train Loss: 0.2801 Val mIoU: 0.4678


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.58it/s]


Epoch: 16 Train Loss: 0.2706 Val mIoU: 0.4762
Saved best model


Train batches: 100%|██████████| 123/123 [00:24<00:00,  4.93it/s]


Epoch: 17 Train Loss: 0.2717 Val mIoU: 0.4781
Saved best model


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.91it/s]


Epoch: 18 Train Loss: 0.2586 Val mIoU: 0.4897
Saved best model


Train batches: 100%|██████████| 123/123 [00:24<00:00,  4.95it/s]


Epoch: 19 Train Loss: 0.2596 Val mIoU: 0.4819


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.68it/s]


Epoch: 20 Train Loss: 0.2545 Val mIoU: 0.4794


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.65it/s]


Epoch: 21 Train Loss: 0.2447 Val mIoU: 0.4966
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.61it/s]


Epoch: 22 Train Loss: 0.2343 Val mIoU: 0.4818


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.66it/s]


Epoch: 23 Train Loss: 0.2363 Val mIoU: 0.4924


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.70it/s]


Epoch: 24 Train Loss: 0.2300 Val mIoU: 0.4960


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.66it/s]


Epoch: 25 Train Loss: 0.2275 Val mIoU: 0.4997
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.66it/s]


Epoch: 26 Train Loss: 0.2185 Val mIoU: 0.5049
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.67it/s]


Epoch: 27 Train Loss: 0.2124 Val mIoU: 0.5090
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.70it/s]


Epoch: 28 Train Loss: 0.2110 Val mIoU: 0.5066


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.67it/s]


Epoch: 29 Train Loss: 0.2085 Val mIoU: 0.5159
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.56it/s]


Epoch: 30 Train Loss: 0.2114 Val mIoU: 0.5029


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.62it/s]


Epoch: 31 Train Loss: 0.1943 Val mIoU: 0.5171
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.60it/s]


Epoch: 32 Train Loss: 0.1885 Val mIoU: 0.5179
Saved best model


Train batches: 100%|██████████| 123/123 [00:27<00:00,  4.51it/s]


Epoch: 33 Train Loss: 0.1852 Val mIoU: 0.5181
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.61it/s]


Epoch: 34 Train Loss: 0.1824 Val mIoU: 0.5187
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.59it/s]


Epoch: 35 Train Loss: 0.1839 Val mIoU: 0.5197
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.59it/s]


Epoch: 36 Train Loss: 0.1823 Val mIoU: 0.5196


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.62it/s]


Epoch: 37 Train Loss: 0.1833 Val mIoU: 0.5135


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.63it/s]


Epoch: 38 Train Loss: 0.1826 Val mIoU: 0.5205
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.64it/s]


Epoch: 39 Train Loss: 0.1794 Val mIoU: 0.5187


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.65it/s]


Epoch: 40 Train Loss: 0.1792 Val mIoU: 0.5209
Saved best model


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.69it/s]


Epoch: 41 Train Loss: 0.1789 Val mIoU: 0.5184


Train batches: 100%|██████████| 123/123 [00:27<00:00,  4.47it/s]


Epoch: 42 Train Loss: 0.1768 Val mIoU: 0.5167


Train batches: 100%|██████████| 123/123 [00:28<00:00,  4.26it/s]


Epoch: 43 Train Loss: 0.1795 Val mIoU: 0.5179


Train batches: 100%|██████████| 123/123 [00:27<00:00,  4.43it/s]


Epoch: 44 Train Loss: 0.1771 Val mIoU: 0.5205


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.62it/s]


Epoch: 45 Train Loss: 0.1759 Val mIoU: 0.5193


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.57it/s]


Epoch: 46 Train Loss: 0.1759 Val mIoU: 0.5178


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.57it/s]


Epoch: 47 Train Loss: 0.1746 Val mIoU: 0.5168


Train batches: 100%|██████████| 123/123 [00:26<00:00,  4.59it/s]


Epoch: 48 Train Loss: 0.1740 Val mIoU: 0.5199


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.78it/s]


Epoch: 49 Train Loss: 0.1751 Val mIoU: 0.5207


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.81it/s]


Epoch: 50 Train Loss: 0.1727 Val mIoU: 0.5171


Train batches: 100%|██████████| 123/123 [00:24<00:00,  4.96it/s]


Epoch: 51 Train Loss: 0.1707 Val mIoU: 0.5210
Saved best model


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.90it/s]


Epoch: 52 Train Loss: 0.1707 Val mIoU: 0.5186


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.77it/s]


Epoch: 53 Train Loss: 0.1726 Val mIoU: 0.5190


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.81it/s]


Epoch: 54 Train Loss: 0.1707 Val mIoU: 0.5156


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.86it/s]


Epoch: 55 Train Loss: 0.1710 Val mIoU: 0.5168


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.85it/s]


Epoch: 56 Train Loss: 0.1687 Val mIoU: 0.5168


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.83it/s]


Epoch: 57 Train Loss: 0.1721 Val mIoU: 0.5173


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.85it/s]


Epoch: 58 Train Loss: 0.1687 Val mIoU: 0.5236
Saved best model


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.84it/s]


Epoch: 59 Train Loss: 0.1683 Val mIoU: 0.5189


Train batches: 100%|██████████| 123/123 [00:25<00:00,  4.78it/s]


Epoch: 60 Train Loss: 0.1695 Val mIoU: 0.5165
Training complete. Best mIoU: 0.5236152443604731


In [29]:
# Stage 9 — Evaluate on TEST set using saved best checkpoint
def evaluate_test(weights=os.path.join(CHECKPOINT_DIR, "best_camvid.pth"), root=CAMVID_ROOT, size=IMG_SIZE):
    device = DEVICE
    test_set = CamVidDataset(root, f"{root}/splits/test.txt", transform_fn=lambda i,m: val_transform(i,m,size))
    test_loader = DataLoader(test_set, batch_size=1, shuffle=False, num_workers=0)
    model = FastSCNN(num_classes=NUM_CLASSES).to(device)
    state = torch.load(weights, map_location=device)
    model.load_state_dict(state)
    miou = evaluate_miou(model, test_loader, device, NUM_CLASSES, IGNORE_INDEX)
    print("Test mIoU:", miou)
    return miou

test_miou = evaluate_test()

Test mIoU: 0.49644736268824524


In [30]:
# Stage 10 — Visualization and detection extraction
palette = {
    0: (128,128,128), 1: (128,0,0), 2: (192,192,128),
    3: (128,64,128),  4: (0,0,192), 5: (128,128,0),
    6: (192,128,128), 7: (64,64,128), 8: (64,0,128),
    9: (64,64,0),     10:(0,128,192)
}

def mask_to_color(mask):
    h,w = mask.shape
    out = np.zeros((h,w,3), dtype=np.uint8)
    for cid, rgb in palette.items():
        out[mask == cid] = rgb
    return out

def extract_bboxes_from_mask(mask, class_id):
    """
    mask: 2D numpy array (H,W) predicted class ids
    class_id: the class to extract boxes for
    return: list of [x1,y1,x2,y2] in pixel coords
    """
    binary = (mask == class_id).astype(np.uint8) * 255
    if binary.sum() == 0:
        return []
    # find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    boxes = []
    for cnt in contours:
        x,y,w,h = cv2.boundingRect(cnt)
        boxes.append([x, y, x+w, y+h])
    return boxes

def draw_boxes_on_image(img_rgb, boxes, color=(0,255,0), thickness=2, label=None):
    # img_rgb: HxWx3 uint8
    canvas = img_rgb.copy()
    for i,box in enumerate(boxes):
        x1,y1,x2,y2 = box
        cv2.rectangle(canvas, (x1,y1), (x2,y2), color, thickness)
        if label:
            cv2.putText(canvas, label, (x1, max(y1-6,0)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
    return canvas

def visualize_and_save_all(
    weights=os.path.join(CHECKPOINT_DIR, "best_camvid.pth"),
    root=CAMVID_ROOT,
    size=IMG_SIZE,
    save_detection=True,
    classes_to_box=[1,4,5]  # example: road? person? car? adjust to your mapping
):
    device = DEVICE
    dataset = CamVidDataset(root, f"{root}/splits/test.txt", transform_fn=lambda i,m: val_transform(i,m,size))
    model = FastSCNN(num_classes=NUM_CLASSES).to(device)
    model.load_state_dict(torch.load(weights, map_location=device))
    model.eval()

    for idx in range(len(dataset)):
        img_t, gt_mask = dataset[idx]
        img_in = img_t.unsqueeze(0).to(device)
        with torch.no_grad():
            out = model(img_in)
            pred = torch.argmax(out, dim=1).squeeze().cpu().numpy()
        gt = gt_mask.numpy()
        # convert input tensor back to displayable RGB (unnormalize)
        img_np = img_t.cpu().numpy().transpose(1,2,0)
        img_np = (img_np * np.array(IMAGENET_STD) + np.array(IMAGENET_MEAN))
        img_np = np.clip(img_np, 0, 1)
        img_disp = (img_np * 255).astype(np.uint8)

        gt_rgb = mask_to_color(gt)
        pred_rgb = mask_to_color(pred)

        # save comparison side by side
        comp = np.concatenate([img_disp, gt_rgb, pred_rgb], axis=1)
        comp_pil = Image.fromarray(comp)
        comp_pil.save(os.path.join(OUTPUT_COMP, f"comp_{idx}.png"))

        # detection overlay: draw boxes for selected classes
        if save_detection:
            det_img = img_disp.copy()
            for cls in classes_to_box:
                boxes = extract_bboxes_from_mask(pred, cls)
                label = f"class_{cls}"
                # color pick
                col = (0,255,0) if cls==5 else (255,0,0)  # example: class 5 as green (car), else red
                det_img = draw_boxes_on_image(det_img, boxes, color=col, thickness=2, label=label)
            Image.fromarray(det_img).save(os.path.join(OUTPUT_DET, f"det_{idx}.png"))

    print("Saved comparisons to", OUTPUT_COMP, "and detections to", OUTPUT_DET)

# Run visualization & detection extraction for all test images (or limit by slicing)
visualize_and_save_all()

Saved comparisons to outputs/comparisons and detections to outputs/detection
