# üèçÔ∏è LossZero: Motorcycle Night Ride SegFormer-B2 Optimized

Ïù¥ ÎÖ∏Ìä∏Î∂ÅÏùÄ **SegFormer-B2** Î™®Îç∏ÏùÑ ÏÇ¨Ïö©ÌïòÏó¨ ÏïºÍ∞Ñ Ïò§ÌÜ†Î∞îÏù¥ Ï£ºÌñâ Ïù¥ÎØ∏ÏßÄÏùò ÏãúÎ©òÌã± ÏÑ∏Í∑∏Î©òÌÖåÏù¥ÏÖòÏùÑ ÏàòÌñâÌï©ÎãàÎã§.

### üõ†Ô∏è Ï£ºÏöî ÏãúÎÇòÎ¶¨Ïò§
- **Î™®Îç∏**: SegFormer-B2 (Transformer Í∏∞Î∞ò)
- **Î∞±Î≥∏**: MiT-B2
- **ÏÇ¨Ï†Ñ ÌïôÏäµ**: Cityscapes (ÎèÑÎ°ú ÌôòÍ≤Ω ÌäπÌôî)
- **ÏµúÏ†ÅÌôî**: AdamW + FP16 Mixed Precision
- **ÏÜêÏã§ Ìï®Ïàò**: Weighted CrossEntropy (Ï§ëÏöî Í∞ùÏ≤¥ Í∞ÄÏ§ëÏπò Î∂ÄÏó¨)

In [31]:
import os
import cv2
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from pycocotools.coco import COCO
import albumentations as A
from albumentations.pytorch import ToTensorV2
from transformers import SegformerForSemanticSegmentation, SegformerConfig
from torch.amp import autocast, GradScaler
from tqdm.auto import tqdm
import wandb

print(f"PyTorch version: {torch.__version__}")


PyTorch version: 2.9.0+cpu


## Colab Ïó∞Í≤∞

In [32]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [33]:
def get_device():
    if torch.cuda.is_available():
        return "cuda"

    return "cpu"

def num_worker():
    if torch.cuda.is_available():
        return os.cpu_count()

    return 0

# ‚öôÔ∏è ÏÑ§Ï†ï (Configuration)
# DATA_DIR = "/content/drive/MyDrive/motor_model"
DATA_DIR = "/content/drive/MyDrive/data/LossZero/"
# DATA_DIR = os.path.expanduser("~/Projects/LossZero/data/Motorcycle Night Ride Dataset")
print("Detected Local Environment")

JSON_PATH = os.path.join(DATA_DIR, "COCO_motorcycle (pixel).json")
IMG_DIR = os.path.join(DATA_DIR, "images")

CFG = {
    "project": "LossZero",
    "model_name": "nvidia/segformer-b2-finetuned-cityscapes-1024-1024",
    "img_size": (384, 672),
    "batch_size": 4,
    "lr": 1e-4,
    "epochs": 2,
    "device": get_device(),
    "num_worker": num_worker(),
    "train": True,
    "wndb": False,
}
if CFG["wndb"]:
    wandb.login()

print(f"Using device: {CFG['device']}")
print(f"Data directory: {DATA_DIR}")

Detected Local Environment
Using device: cpu
Data directory: /content/drive/MyDrive/data/LossZero/


In [34]:
def create_mask_from_json(coco, img_id, img_info, id_to_idx):
    ann_ids = coco.getAnnIds(imgIds=img_id)
    anns = coco.loadAnns(ann_ids)
    mask = np.zeros((img_info['height'], img_info['width']), dtype=np.uint8)

    for ann in anns:
        cat_id = ann['category_id']
        if cat_id in id_to_idx:
            cls_idx = id_to_idx[cat_id]
            pixel_mask = coco.annToMask(ann)
            mask[pixel_mask == 1] = cls_idx

    return mask

def process_single_data(coco, img_id, img_dir, id_to_idx, transform=None):
    img_info = coco.loadImgs(img_id)[0]
    img_path = os.path.join(img_dir, img_info['file_name'])

    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    mask = create_mask_from_json(coco, img_id, img_info, id_to_idx)

    if transform:
        augmented = transform(image=image, mask=mask)
        image, mask = augmented['image'], augmented['mask']

    return image, torch.as_tensor(mask).long()

train_transform = A.Compose([
    #  ÏõêÎ≥∏ Ìï¥ÏÉÅÎèÑÏóêÏÑú 480x480 ÌÅ¨Í∏∞Î°ú Î¨¥ÏûëÏúÑ Ï∂îÏ∂ú (ÌôîÏßà Ï†ÄÌïò ÏóÜÏùå)
    A.RandomCrop(height=CFG['img_size'][0], width=CFG['img_size'][1], p=1.0),
    A.PadIfNeeded(min_height=CFG['img_size'][0], min_width=CFG['img_size'][1], p=1.0),

    # --- ÏïºÍ∞Ñ Ï†ÑÏö© Augmentation Ï∂îÍ∞Ä ---
    A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.RandomGamma(gamma_limit=(80, 120), p=0.5), # Ïñ¥ÎëêÏö¥ Ï†ÄÏ°∞ÎèÑ Í∞úÏÑ†
    A.GaussNoise(std_range=(0.02, 0.05), p=0.3), # ÏïºÍ∞Ñ ÎÖ∏Ïù¥Ï¶à ÎåÄÏùë

    # --- Í∏∞ÌïòÌïôÏ†Å Î≥ÄÌòï (Îç∞Ïù¥ÌÑ∞ Ïàò Î≥¥Ï∂©Ïö©) ---
    A.HorizontalFlip(p=0.5), # Ï¢åÏö∞ Î∞òÏ†Ñ
    # 0.0625Îäî Î®∏Ïã†Îü¨Îãù/Îî•Îü¨Îãù Ïª§ÎÆ§ÎãàÌã∞ÏóêÏÑú Ïò§Îû´ÎèôÏïà Í≤ÄÏ¶ùÎêú 'ÏÇ¨Ïã§ÏÉÅ ÌëúÏ§Ä(De Facto Standard)
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=25, p=0.5), # Ïù¥Îèô/ÌÅ¨Í∏∞/ÌöåÏ†Ñ

    # ImageNet Îç∞Ïù¥ÌÉÄÏÖãÏùò ÌèâÍ∑†Í∞í ÎÇòÏÅòÏßÄ ÏïäÏùå. SegFormerÍ∞Ä ImageNet/CityscapesÎ°ú Î∞∞Ïõ†ÏúºÎãàÍπå
    # Î™®Îç∏Ïù¥ ÏÉàÎ°úÏö¥ ÏÇ¨ÏßÑÏùÑ Î∞õÏùÑ Îïå: ÏûÖÎ†•_Ïù¥ÎØ∏ÏßÄ = (ÏõêÎ≥∏_Ïù¥ÎØ∏ÏßÄ - ÌèâÍ∑†) / ÌëúÏ§ÄÌé∏Ï∞®
    # Ïù¥Î†áÍ≤å Í≥ÑÏÇ∞Ìï¥Ï£ºÎ©¥, Ïñ¥Îñ§ ÏÇ¨ÏßÑÏù¥ Îì§Ïñ¥ÏôÄÎèÑ "ÌèâÍ∑†Ïù¥ 0Ïù¥Í≥† ÌëúÏ§ÄÌé∏Ï∞®Í∞Ä 1Ïù∏(Standard Normal Distribution)" ÏïÑÏ£º ÏòàÏÅú Îç∞Ïù¥ÌÑ∞Î°ú Î≥ÄÏã†
    # Ï†ÑÏ≤¥ ÏïºÍ∞Ñ Îç∞Ïù¥ÌÑ∞ÏÖãÏùò Mean/StdÎ•º ÏßÅÏ†ë Í≥ÑÏÇ∞Ìïú Í∞í
    A.Normalize(mean=(0.281, 0.268, 0.346), std=(0.347, 0.290, 0.292)),
    ToTensorV2()
])

coco = COCO(JSON_PATH)
img_ids = list(coco.imgs.keys())
cat_ids = coco.getCatIds()
id_to_idx = {cat_id: i for i, cat_id in enumerate(cat_ids)}
print(f"Category Mapping: {id_to_idx}")

loading annotations into memory...
Done (t=9.80s)
creating index...
index created!
Category Mapping: {1329681: 0, 1323885: 1, 1323884: 2, 1323882: 3, 1323881: 4, 1323880: 5}


## Traing / Val Î∂ÑÎ¶¨

In [35]:
import random
from sklearn.model_selection import train_test_split
class MotorcycleNightRideDataset(Dataset):
    def __init__(self, coco, img_ids, img_dir, id_to_idx, transform=None, use_copy_paste=False):
        self.coco = coco
        self.img_ids = img_ids
        self.img_dir = img_dir
        self.id_to_idx = id_to_idx
        self.transform = transform
        self.use_copy_paste = use_copy_paste  # Copy-Paste ÌôúÏÑ±Ìôî Ïó¨Î∂Ä (TrainÎßå True)

    def __len__(self):
        return len(self.img_ids)

    def load_image_mask(self, idx):
        """Ïù∏Îç±Ïä§Ïóê Ìï¥ÎãπÌïòÎäî Ïù¥ÎØ∏ÏßÄÏôÄ ÎßàÏä§ÌÅ¨Î•º Î°úÎìúÌïòÍ≥† BGR->RGB Î≥ÄÌôò"""
        img_id = self.img_ids[idx]
        img_info = self.coco.loadImgs(img_id)[0]
        img_path = os.path.join(self.img_dir, img_info['file_name'])

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # ÎßàÏä§ÌÅ¨ ÏÉùÏÑ± (Í∏∞Ï°¥ Ìï®Ïàò ÌôúÏö©)
        mask = create_mask_from_json(self.coco, img_id, img_info, self.id_to_idx)
        return image, mask

    def apply_copy_paste(self, image, mask):
        """
        Copy-Paste Augmentation:
        Îã§Î•∏ Ïù¥ÎØ∏ÏßÄ(Donor)ÏóêÏÑú 'Lane Mark(3)'ÎÇò 'Moveable(2)' Í∞ôÏùÄ ÏÜåÏàò ÌÅ¥ÎûòÏä§Î•º Ïò§Î†§ÎÇ¥Ïñ¥
        ÌòÑÏû¨ Ïù¥ÎØ∏ÏßÄ(Target)Ïóê Î∂ôÏó¨ÎÑ£ÏäµÎãàÎã§.
        """
        # 1. Í∏∞Ï¶ùÏûê(Donor) Î¨¥ÏûëÏúÑ ÏÑ†ÌÉù
        donor_idx = random.randint(0, len(self.img_ids) - 1)
        donor_img, donor_mask = self.load_image_mask(donor_idx)

        # 2. Ïò§Î†§ÎÇº ÌÉÄÍ≤ü ÌÅ¥ÎûòÏä§ Ï†ïÏùò (Ï∞®ÏÑ†Í≥º ÏõÄÏßÅÏù¥Îäî Î¨ºÏ≤¥ ÏßëÏ§ë Í≥µÎûµ)
        # Lane Mark: 3, Moveable: 2
        target_indices = [2, 3]

        # donor_maskÏóêÏÑú Ìï¥Îãπ ÌÅ¥ÎûòÏä§Ïù∏ ÌîΩÏÖÄÎßå True (ÎÇòÎ®∏ÏßÄ False)
        # np.isinÏùÑ ÏÇ¨Ïö©ÌïòÏó¨ Ïó¨Îü¨ ÌÅ¥ÎûòÏä§Î•º Ìïú Î≤àÏóê Ï≤òÎ¶¨
        paste_mask = np.isin(donor_mask, target_indices)

        # 3. Î∂ôÏó¨ÎÑ£Í∏∞ (Paste)
        # Ìï¥Îãπ ÏòÅÏó≠Ïóê ÎÇ¥Ïö©Î¨ºÏù¥ ÏûàÏùÑ Í≤ΩÏö∞ÏóêÎßå Ïã§Ìñâ
        if np.any(paste_mask):
            # Ïù¥ÎØ∏ÏßÄ ÎçÆÏñ¥Ïì∞Í∏∞
            image[paste_mask] = donor_img[paste_mask]
            # ÎßàÏä§ÌÅ¨ ÎçÆÏñ¥Ïì∞Í∏∞ (Ï†ïÎãµÏßÄ ÏàòÏ†ï)
            mask[paste_mask] = donor_mask[paste_mask]

        return image, mask

    def __getitem__(self, idx):
        # 1. ÏõêÎ≥∏ Îç∞Ïù¥ÌÑ∞ Î°úÎìú
        image, mask = self.load_image_mask(idx)

        # 2. Copy-Paste Ï†ÅÏö© (ÌõàÎ†® Îç∞Ïù¥ÌÑ∞ÏÖãÏù¥Í≥†, 50% ÌôïÎ•† ÎãπÏ≤® Ïãú)
        if self.use_copy_paste and random.random() < 0.5:
            image, mask = self.apply_copy_paste(image, mask)

        # 3. Albumentations Î≥ÄÌôò (Resize, ColorJitter, Normalization Îì±)
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, torch.as_tensor(mask).long()

# --------------------------------------------------------------------------
# 1. Îç∞Ïù¥ÌÑ∞ Î°úÎìú Î∞è ID Î∂ÑÌï† (7:2:1)
coco = COCO(JSON_PATH)
all_ids = list(coco.imgs.keys())

# First split: 70% train, 30% temp
train_ids, temp_ids = train_test_split(all_ids, test_size=0.3, random_state=42)
# Second split: temp_ids into 2/3 for val (0.2 of total), 1/3 for test (0.1 of total)
val_ids, test_ids = train_test_split(temp_ids, test_size=1/3, random_state=42)

# 2. Transform Ï†ïÏùò (Í∏∞Ï°¥ Ï†ïÏùò ÌôúÏö©)
val_transform = A.Compose([
    A.Resize(CFG['img_size'][0], CFG['img_size'][1]),
    A.Normalize(mean=(0.281, 0.268, 0.346), std=(0.347, 0.290, 0.292)),
    ToTensorV2()
])
# Test transform is the same as validation transform
test_transform = val_transform

# 3. Îç∞Ïù¥ÌÑ∞ÏÖã Ïù∏Ïä§ÌÑ¥Ïä§ ÏÉùÏÑ± (‚òÖTrainÏóêÎßå Copy-Paste ÌôúÏÑ±Ìôî‚òÖ)
train_dataset = MotorcycleNightRideDataset(
    coco, train_ids, IMG_DIR, id_to_idx,
    transform=train_transform,
    use_copy_paste=True  # Copy-Paste ON!
)
val_dataset = MotorcycleNightRideDataset(
    coco, val_ids, IMG_DIR, id_to_idx,
    transform=val_transform,
    use_copy_paste=False # ValÏóêÎäî Ï†àÎåÄ Ïì∞Î©¥ Ïïà Îê® (ÏàúÏàò ÌèâÍ∞Ä)
)
test_dataset = MotorcycleNightRideDataset( # New test dataset
    coco, test_ids, IMG_DIR, id_to_idx,
    transform=test_transform,
    use_copy_paste=False # TestÏóêÎäî Ï†àÎåÄ Ïì∞Î©¥ Ïïà Îê® (ÏàúÏàò ÌèâÍ∞Ä)
)

# 4. Îç∞Ïù¥ÌÑ∞ Î°úÎçî ÏÉùÏÑ±
train_loader = DataLoader(
    train_dataset,
    batch_size=CFG['batch_size'],
    shuffle=True,
    num_workers=CFG['num_worker'],
    pin_memory=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CFG['batch_size'],
    shuffle=False,
    num_workers=CFG['num_worker'],
    pin_memory=True
)
test_loader = DataLoader( # New test loader
    test_dataset,
    batch_size=CFG['batch_size'],
    shuffle=False,
    num_workers=CFG['num_worker'],
    pin_memory=True
)

# Define CFG_EVAL for later use
CFG_EVAL = {
    "num_classes": len(id_to_idx)
}

print(f"‚úÖ Data Ready with Copy-Paste Augmentation!")
print(f"   Train={len(train_ids)} (CP Active), Val={len(val_ids)}, Test={len(test_ids)}")

loading annotations into memory...
Done (t=9.68s)
creating index...
index created!
‚úÖ Data Ready with Copy-Paste Augmentation!
   Train=140 (CP Active), Val=40, Test=20


### üìâ ÌÅ¥ÎûòÏä§Î≥Ñ Î∂ÑÌè¨ ÏöîÏïΩ (ÎÇ¥Î¶ºÏ∞®Ïàú)

1. **Undrivable (Ï£ºÌñâ Î∂àÍ∞Ä ÏòÅÏó≠)**: **42.9%** (ÏïïÎèÑÏ†Å 1ÏúÑ)
   - Î∞∞Í≤Ω(ÌïòÎäò, Í±¥Î¨º, ÌíÄÏà≤ Îì±)Ïù¥ Ïù¥ÎØ∏ÏßÄÏùò Ï†àÎ∞ò Í∞ÄÍπåÏù¥ Ï∞®ÏßÄÌï©ÎãàÎã§.
2. **Road (Ï£ºÌñâ Í∞ÄÎä• ÎèÑÎ°ú)**: **27.1%**
   - ÎèÑÎ°ú ÏûêÏ≤¥ÎèÑ ÍΩ§ ÎßéÏùÄ ÏòÅÏó≠ÏùÑ Ï∞®ÏßÄÌï©ÎãàÎã§.
3. **My bike (ÎÇ¥ Ïò§ÌÜ†Î∞îÏù¥)**: **15.8%**
   - Ï£ºÌñâÏûê ÏãúÏ†êÏù¥Îùº ÎÇ¥ Ïò§ÌÜ†Î∞îÏù¥Í∞Ä Ìï≠ÏÉÅ Î≥¥Ïù¥Í∏∞ ÎïåÎ¨∏Ïóê ÎπÑÏú®Ïù¥ ÎÜíÏäµÎãàÎã§.
4. **Rider (ÌÉëÏäπÏûê)**: **8.1%**
   - Îã§Î•∏ Ïò§ÌÜ†Î∞îÏù¥ Ïö¥Ï†ÑÏûêÎÇò ÎÇ¥ Ïã†Ï≤¥Í∞Ä Ìè¨Ìï®Îêú Í≤ÉÏúºÎ°ú Î≥¥ÏûÖÎãàÎã§.
5. **Moveable (Ïù¥Îèô Î¨ºÏ≤¥)**: **4.7%**
   - Îã§Î•∏ Ï∞®Îüâ, Î≥¥ÌñâÏûê Îì± ÏïàÏ†ÑÏóê Í∞ÄÏû• Ï§ëÏöîÌïú Ïû•Ïï†Î¨ºÏù∏Îç∞ ÎπÑÏú®Ïù¥ Îß§Ïö∞ ÎÇÆÏäµÎãàÎã§.
6. **Lane Mark (Ï∞®ÏÑ†)**: **1.4%**
   - Í∞ÄÏû• Ïã¨Í∞ÅÌïú Î∂àÍ∑†ÌòïÏûÖÎãàÎã§. ÎèÑÎ°ú Ï£ºÌñâÏùò ÌïµÏã¨Ïù∏ Ï∞®ÏÑ†Ïù¥ Í≥†Ïûë 1% ÎÇ®ÏßìÏûÖÎãàÎã§.

In [36]:
id2label = {i: coco.loadCats(cat_id)[0]['name'] for cat_id, i in id_to_idx.items()}
label2id = {v: k for k, v in id2label.items()}

model = SegformerForSemanticSegmentation.from_pretrained(
    CFG['model_name'],
    num_labels=len(id_to_idx),
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
).to(CFG['device'])

optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=CFG['lr'], # Learning Rate
    weight_decay=0.05 #Í∞ÄÏ§ëÏπòÏùò Í¥ÄÏÑ± Ï†úÏñ¥, ÏòµÌã∞ÎßàÏù¥Ï†ÄÏóêÏÑú 0.01Ïù¥ÎùºÎäî Í∞íÏùÄ Îß§ ÌïôÏäµ Îã®Í≥Ñ(Step)ÎßàÎã§ ÌòÑÏû¨ Í∞ÄÏ§ëÏπò Í∞íÏùÑ ÏñºÎßàÎÇò ÍπéÏùÑÏßÄÎ•º Í≤∞Ï†ïÌïòÎäî ÎπÑÏú®ÏûÖÎãàÎã§.
)

# ‚öñÔ∏è ÌÅ¥ÎûòÏä§Î≥Ñ Í∞ÄÏ§ëÏπò ÏÑ§Ï†ï (Class Weights)
weights = torch.tensor([
    3.0,   # Rider: 5.0 ‚Üí 3.0 (Ï§ëÏöîÌïòÏßÄÎßå Í≥ºÌïòÏßÄ ÏïäÍ≤å)
    1.5,   # My bike: 2.0 ‚Üí 1.5 (ÎÇ¥ Ïò§ÌÜ†Î∞îÏù¥Îäî ÎÑàÎ¨¥ Ïûò ÎßûÌûàÎãà Ï°∞Í∏à Îçî ÎÇÆÏ∂§)
    6.0,   # Moveable: 10.0 ‚Üí 6.0 (Ï†àÎ∞òÏúºÎ°ú Ï§ÑÏó¨ÏÑú Î∂ÄÎã¥ ÏôÑÌôî)
    12.0,  # Lane Mark: 20.0 ‚Üí 12 (Ïó¨Ï†ÑÌûà Ï†úÏùº Í∞ïÎ†•ÌïòÏßÄÎßå, 20Î∞∞Îäî ÎÑàÎ¨¥ Í∞ÄÌòπÌñàÏùå)
    1.0,   # Road: 1.0 (Í∏∞Ï§ÄÏ†ê Ïú†ÏßÄ)
    0.8    # Undrivable: 0.5 ‚Üí 0.8 (Î∞∞Í≤ΩÏùÑ ÎÑàÎ¨¥ Î¨¥ÏãúÌï¥ÏÑú ÎèÑÎ°ú Í≤ΩÍ≥ÑÍ∞Ä Î¨¥ÎÑàÏßÄÎäî Í≤É Î∞©ÏßÄ)
], dtype=torch.float).to(CFG['device'])

criterion = nn.CrossEntropyLoss(weight=weights)

scaler = GradScaler('cuda') if CFG['device'] == 'cuda' else None

Loading weights:   0%|          | 0/380 [00:00<?, ?it/s]

SegformerForSemanticSegmentation LOAD REPORT from: nvidia/segformer-b2-finetuned-cityscapes-1024-1024
Key                           | Status   |                                                                                                  
------------------------------+----------+--------------------------------------------------------------------------------------------------
decode_head.classifier.weight | MISMATCH | Reinit due to size mismatch ckpt: torch.Size([19, 768, 1, 1]) vs model:torch.Size([6, 768, 1, 1])
decode_head.classifier.bias   | MISMATCH | Reinit due to size mismatch ckpt: torch.Size([19]) vs model:torch.Size([6])                      

Notes:
- MISMATCH	:ckpt weights were loaded, but they did not match the original empty weight shapes.


In [37]:
import torch
import numpy as np
import cv2
from sklearn.metrics import confusion_matrix

# [1] Í∏∞Î≥∏ IoU Í≥ÑÏÇ∞ Î°úÏßÅ (Category-specific)
def compute_category_iou(preds, targets, num_classes):
    preds_flat = preds.flatten().cpu().numpy()
    targets_flat = targets.flatten().cpu().numpy()
    valid_mask = (targets_flat >= 0) & (targets_flat < num_classes)
    preds_flat = preds_flat[valid_mask]
    targets_flat = targets_flat[valid_mask]
    cm = confusion_matrix(targets_flat, preds_flat, labels=range(num_classes))
    intersection = np.diag(cm)
    ground_truth_set = cm.sum(axis=1)
    predicted_set = cm.sum(axis=0)
    union = ground_truth_set + predicted_set - intersection
    iou = intersection / (union + 1e-6)
    return iou

# [2] Boundary IoU Í≥ÑÏÇ∞ Î°úÏßÅ
def get_boundary(mask, dilation_pixels=2):
    mask = mask.astype(np.uint8)
    kernel = np.ones((3, 3), np.uint8)
    eroded = cv2.erode(mask, kernel, iterations=dilation_pixels)
    boundary = mask - eroded
    return boundary

def compute_boundary_iou(preds, targets, num_classes, dilation_pixels=2):
    preds_np = preds.cpu().numpy()
    targets_np = targets.cpu().numpy()
    b_ious = []
    for c in range(num_classes):
        class_preds = (preds_np == c)
        class_targets = (targets_np == c)
        ious_per_batch = []
        for i in range(preds_np.shape[0]):
            gt_boundary = get_boundary(class_targets[i], dilation_pixels)
            pred_boundary = get_boundary(class_preds[i], dilation_pixels)
            intersection = ((gt_boundary > 0) & (pred_boundary > 0)).sum()
            union = ((gt_boundary > 0) | (pred_boundary > 0)).sum()
            if union == 0:
                ious_per_batch.append(1.0)
            else:
                ious_per_batch.append(intersection / union)
        if not ious_per_batch:
            b_ious.append(0.0)
        else:
            b_ious.append(np.mean(ious_per_batch))
    return np.array(b_ious)

# [3] ÌÜµÌï© ÌèâÍ∞Ä Ìï®Ïàò (Metrics Calculation)
def evaluate_metrics(preds, targets, num_classes):
    """ÏòàÏ∏°Í∞íÍ≥º Ï†ïÎãµÏùÑ Î∞õÏïÑ Î™®Îì† ÏßÄÌëú(Category IoU, Boundary IoU)Î•º Ìïú Î≤àÏóê Í≥ÑÏÇ∞"""
    cat_iou = compute_category_iou(preds, targets, num_classes)
    bound_iou = compute_boundary_iou(preds, targets, num_classes)
    return cat_iou, bound_iou

# [4] ÏÉÅÏÑ∏ Î¶¨Ìè¨Ìä∏ Ï∂úÎ†• Ìï®Ïàò (Report Generation)
def print_evaluation_report(avg_cat_iou, avg_bound_iou, id2label=None):
    """Í≥ÑÏÇ∞Îêú ÌèâÍ∑† IoU Í∞íÎì§ÏùÑ Î∞õÏïÑ Î≥¥Í∏∞ Ï¢ãÍ≤å Ï∂úÎ†•"""
    print("\n[Validation Report]")
    print("  Category-specific IoU:")
    for i, iou in enumerate(avg_cat_iou):
        label = id2label[i] if id2label else f"Class {i}"
        print(f"    - {label}: {iou:.4f}")

    print("\n  Boundary IoU (Details):")
    for i, iou in enumerate(avg_bound_iou):
        label = id2label[i] if id2label else f"Class {i}"
        print(f"    - {label}: {iou:.4f}")

    mIoU = np.nanmean(avg_cat_iou)
    mBoU = np.nanmean(avg_bound_iou)
    print(f"\n  --> mIoU: {mIoU:.4f} | mBoU: {mBoU:.4f}\n")
    return mIoU, mBoU


In [38]:
# [Í≥µÌÜµ Î°úÏßÅ] Îç∞Ïù¥ÌÑ∞ Ï†ÑÏÜ°, Ï∂îÎ°†, ÌôïÎåÄ, ÏÜêÏã§ Í≥ÑÏÇ∞
def forward_step(model, images, masks, criterion, device):
    # Îç∞Ïù¥ÌÑ∞Î•º GPU(CUDA) ÎòêÎäî CPU Ï§ë Ïã§Ï†ú Ïó∞ÏÇ∞Ïù¥ ÏùºÏñ¥ÎÇ† Ïû•ÏπòÎ°ú Î≥¥ÎÇ∏Îã§
    # torch.Tensor ÌòïÌÉúÏù¥Î©∞ contiguous()Î°ú Î©îÎ™®Î¶¨Î•º Ï†ïÎ†¨ÌïúÎã§
    X = images.to(device).contiguous()
    y = masks.to(device).contiguous()

    # Forward Pass
    # logits: Ïö∞Î¶¨Í∞Ä Ï∞æÎäî ÌÅ¥ÎûòÏä§Î≥Ñ Ï†êÏàòÌåê (ÌïÑÏàò!) -10.5ÎÇò 15.2 Í∞ôÏùÄ ÏûêÏú†Î°úÏö¥ Ïà´Ïûê
    outputs = model(X).logits

    # ÌôïÎåÄ (Interpolation)
    # Î™®Îç∏ Í≤∞Í≥ºÎ¨º(outputs)ÏùÄ Ïó∞ÏÇ∞ Ìö®Ïú®ÏùÑ ÏúÑÌï¥ 96x96ÏúºÎ°ú Ï∂ïÏÜåÎêòÏñ¥ ÏûàÏùå
    # Ïù¥Î•º Ï†ïÎãµÏßÄ yÏôÄ ÎòëÍ∞ôÏùÄ ÌÅ¨Í∏∞(384x384)Î°ú Î∂ÄÎìúÎüΩÍ≤å ÌôïÎåÄ(Interpolate)
    # y.shape[-2:] -> (384, 384)
    upsampled_logits = nn.functional.interpolate(
        outputs, size=y.shape[-2:], mode="bilinear", align_corners=False
    )

    # Ïò§Ï∞®(Loss) Í≥ÑÏÇ∞
    loss = criterion(upsampled_logits, y)

    return loss, upsampled_logits, y

# [ÌïôÏäµ Îã®Í≥Ñ] Ìïú ÏóêÌè≠ ÎèôÏïàÏùò ÌïôÏäµ ÏßëÌñâ
def train_one_epoch(model, loader, optimizer, criterion, device, scaler=None, epoch=0):
    model.train()
    train_loss_sum = 0
    pbar = tqdm(loader, desc=f"Epoch {epoch+1} [Train]")

    for images, masks in pbar:
        optimizer.zero_grad()

        # Mixed Precision ÏßÄÏõê (CUDA Ï†ÑÏö©)
        if device == 'cuda' and scaler:
            with torch.amp.autocast('cuda'):
                loss, _, _ = forward_step(model, images, masks, criterion, device)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            # Ïó≠Ï†ÑÌåå Î∞è Í∞ÄÏ§ëÏπò ÏóÖÎç∞Ïù¥Ìä∏
            loss, _, _ = forward_step(model, images, masks, criterion, device)
            loss.backward()
            optimizer.step()

        # Î™®Îç∏Ïù¥ ÏòàÏ∏°Ìïú Í∞íÍ≥º Ï†ïÎãµ ÏÇ¨Ïù¥Ïùò Í±∞Î¶¨ Í∏∞Î°ù
        train_loss_sum += loss.item()
        pbar.set_postfix(Loss=f"{loss.item():.4f}")

    return train_loss_sum / len(loader)

# [Í≤ÄÏ¶ù Îã®Í≥Ñ] Î™®ÎìàÌôîÎêú ÌèâÍ∞Ä Î°úÏßÅ Ï†ÅÏö©
def validate(model, loader, criterion, device, num_classes=6, id2label=None):
    model.eval()
    val_loss_sum = 0

    # ÏßÄÌëú ÎàÑÏ†ÅÏö© Î∞∞Ïó¥ (Total Accumulators)
    total_cat_ious = np.zeros(num_classes)
    total_bound_ious = np.zeros(num_classes)

    with torch.no_grad():
        for images, masks in loader:
            # 1. Í≥µÌÜµ Î°úÏßÅ Ïã§Ìñâ (Loss Í≥ÑÏÇ∞)
            loss, logits, y = forward_step(model, images, masks, criterion, device)
            val_loss_sum += loss.item()

            # 2. ÏòàÏ∏°Í∞í Î≥ÄÌôò
            preds = torch.argmax(logits, dim=1)

            # 3. ÌÜµÌï© ÌèâÍ∞Ä Î™®Îìà Ìò∏Ï∂ú
            cat_iou, bound_iou = evaluate_metrics(preds, y, num_classes)
            total_cat_ious += cat_iou
            total_bound_ious += bound_iou

    # ÏóêÌè≠ ÌèâÍ∑† Í≥ÑÏÇ∞
    avg_loss = val_loss_sum / len(loader)
    avg_cat_iou = total_cat_ious / len(loader)
    avg_bound_iou = total_bound_ious / len(loader)

    # 4. ÏÉÅÏÑ∏ Î¶¨Ìè¨Ìä∏ Ï∂úÎ†• Î™®Îìà Ìò∏Ï∂ú
    mIoU, mBoU = print_evaluation_report(avg_cat_iou, avg_bound_iou, id2label)

    return avg_loss, mIoU, mBoU



In [None]:
# --- üöÄ Î©îÏù∏ ÌïôÏäµ Î£®ÌîÑ (Auto-Save Í∏∞Îä• ÌÉëÏû¨) ---
print("üöÄ SegFormer-B2 Training Start with Dual Auto-Save Strategy...")

if CFG["wndb"]:
    wandb.init(project=CFG['project'], config=CFG)

best_miou = 0.0
best_mbou = 0.0
best_miou_epoch = 0

save_dir = "./checkpoints"
os.makedirs(save_dir, exist_ok=True)

for epoch in range(CFG['epochs']):
    # 1. ÌïôÏäµ Ïã§Ìñâ
    avg_train_loss = train_one_epoch(model, train_loader, optimizer, criterion, CFG['device'], scaler, epoch)

    # 2. Í≤ÄÏ¶ù Ïã§Ìñâ
    avg_val_loss, avg_val_miou, avg_val_mbou = validate(model, val_loader, criterion, CFG['device'], num_classes=6, id2label=id2label)

    wandb.log({'Train Loss': avg_train_loss, 'Val Loss': avg_val_loss, 'epoch': epoch})


    print(f"üìù Epoch [{epoch+1}/{CFG['epochs']}]")
    print(f"   Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    print(f"   ‚ú® Val mIoU: {avg_val_miou:.4f} | mBoU: {avg_val_mbou:.4f}")

    # --------------------------------------------------------------------------
    # üíæ 3. Ïù¥Ï§ë ÏûêÎèô Ï†ÄÏû• (Dual Auto-Save)
    # --------------------------------------------------------------------------
    # [Í∏∞Ï§Ä 1] mIoU Ï±îÌîºÏñ∏ (Í∞ÄÏû• ÎòëÎòëÌïú Î™®Îç∏)
    if avg_val_miou > best_miou:
        print(f"   üèÜ New Best mIoU! ({best_miou:.4f} -> {avg_val_miou:.4f}) Saving...")
        best_miou = avg_val_miou
        best_miou_epoch = epoch + 1
        torch.save(model.state_dict(), os.path.join(save_dir, "segformer_best_miou.pth"))

    # [Í∏∞Ï§Ä 2] mBoU Ï±îÌîºÏñ∏ (ÌÖåÎëêÎ¶¨ Ïû•Ïù∏)
    if avg_val_mbou > best_mbou:
        print(f"   üé® New Best mBoU! ({best_mbou:.4f} -> {avg_val_mbou:.4f}) Saving...")
        best_mbou = avg_val_mbou
        torch.save(model.state_dict(), os.path.join(save_dir, "segformer_best_mbou.pth"))

    # 0.5 ÏàòÏ§Ä: "Í∏∞Î∞òÏùÄ Îã¶ÏïòÏúºÎÇò, Í≥†ÎìùÏ†ê Î¨∏Ï†úÎäî Ìè¨Í∏∞Ìïú ÏÉÅÌÉú"
    # 0.1 ÏàòÏ§Ä: "Ïã§Ï†ÑÏóê Ìà¨ÏûÖÌï¥Î≥º ÎßåÌïú Ïö∞Îì±ÏÉù"
    # 0.01 ÏàòÏ§Ä: "ÏôÑÎ≤Ω, ÌòπÏùÄ ÎÑàÎ¨¥ Í≥ºÌïú Í≥µÎ∂Ä(Í≥ºÏ†ÅÌï©)"
    print("-" * 50)

print(f"\nüéâ Training Complete!")
print(f"   üëë Overall Best mIoU: {best_miou:.4f} (at Epoch {best_miou_epoch})")
print(f"   üñåÔ∏è Overall Best mBoU: {best_mbou:.4f}")
# ÎßàÏßÄÎßâ ÏÉÅÌÉúÎèÑ Ï†ÄÏû•
torch.save(model.state_dict(), os.path.join(save_dir, "segformer_last.pth"))

if CFG["wndb"]:
    wandb.finish()

üöÄ SegFormer-B2 Training Start with Dual Auto-Save Strategy...


Epoch 1 [Train]:   0%|          | 0/35 [00:00<?, ?it/s]

### üõ†Ô∏è Ï£ºÏöî ÌèâÍ∞Ä Ìï≠Î™©
- **mIoU** (Mean Intersection over Union):
  - **Category-specific IoU**
  - **Boundary IoU**
- **Ïã§ÏãúÍ∞ÑÏÑ± Î∞è ÌïòÎìúÏõ®Ïñ¥ ÏßÄÌëú**
  - **Model Parameters**
  - **MACs** Multiply-Accumulate Operations
    - y = wx + b ÏóêÏÑú wx + bÎ•º 1MAC Ïù¥ÎùºÍ≥† ÌïúÎã§.
  - **GFLOPs** Giga Floating Point Operations
    -  Î™®Îç∏ÏùÑ Ìïú Î≤à Ïã§Ìñâ(Forward Pass)Ìï† Îïå ÌïÑÏöîÌïú Ï¥ù Î∂ÄÎèô ÏÜåÏàòÏ†ê Ïó∞ÏÇ∞Îüâ
    - Î≥¥ÌÜµ 1MAC = 2FLOPs
  - **Average Inference Latency**
  - **Frames Per Second (FPS)**
- **Safety-critical Metrics**

- TODO
  - ÌÖåÏä§Ìä∏ Ïù¥ÎØ∏ÏßÄÎ°ú ÌèâÍ∞ÄÌï† Í≤É ( ÌòÑÏû¨Îäî Ï†ÑÏ≤¥ Ïù¥ÎØ∏ÏßÄ SET ÏÇ¨Ïö© )

In [None]:
import time
from thop import profile

def profile_model(model, img_size, device):
    print(f"üöÄ Starting Model Profiling for: {model.__class__.__name__}...")

    # Î™®Îç∏ÏùÑ ÌèâÍ∞Ä Î™®ÎìúÎ°ú ÏÑ§Ï†ï
    model.eval()

    # 2. Î™®Îç∏ÏùÑ ÏúÑÌïú ÎçîÎØ∏ ÏûÖÎ†• ÌÖêÏÑú ÏÉùÏÑ±
    dummy_input = torch.randn(1, 3, img_size[0], img_size[1]).to(device)

    # 3. ÎçîÎØ∏ ÏûÖÎ†•ÏùÑ ÏÇ¨Ïö©ÌïòÏó¨ Î™®Îç∏Ïùò MACs Î∞è Îß§Í∞úÎ≥ÄÏàò(params)Î•º Í≥ÑÏÇ∞ÌïòÍ∏∞ ÏúÑÌï¥ thop.profile ÏÇ¨Ïö©. verbose=FalseÎ°ú ÏÑ§Ï†ï.
    macs, params = profile(model, inputs=(dummy_input,), verbose=False)

    # 4. Í≥ÑÏÇ∞Îêú MACsÎ•º 1e9Î°ú ÎÇòÎàÑÏñ¥ GFLOPsÎ°ú Î≥ÄÌôò.
    gflops = (macs * 2) / 1e9

    print(f"  Model Parameters (M): {params / 1e6:.2f}")
    print(f"  MACs (G): {macs / 1e9:.2f}")
    print(f"  GFLOPs: {gflops:.2f}")

    # 5. ÏßÄÏó∞ ÏãúÍ∞Ñ Î∞è FPS Ï∏°Ï†ïÏùÑ ÏúÑÌïú Î≥ÄÏàò Ï¥àÍ∏∞Ìôî.
    num_warmup_runs = 10
    num_inference_runs = 100
    total_latency = 0.0

    # 6. ÏõåÎ∞çÏóÖ Ïã§Ìñâ ÏàòÌñâ
    # print(f"  Performing {num_warmup_runs} warm-up runs...")
    with torch.no_grad():
        for _ in range(num_warmup_runs):
            _ = model(dummy_input)

    # 7. Ï∂îÎ°† ÏßÄÏó∞ ÏãúÍ∞Ñ Ï∏°Ï†ï
    # print(f"  Measuring latency over {num_inference_runs} inference runs...")
    with torch.no_grad():
        for _ in range(num_inference_runs):
            start_time = time.perf_counter()
            _ = model(dummy_input)
            end_time = time.perf_counter()
            total_latency += (end_time - start_time)

    # 8. ÌèâÍ∑† ÏßÄÏó∞ ÏãúÍ∞Ñ(ms) Í≥ÑÏÇ∞
    average_latency_ms = (total_latency / num_inference_runs) * 1000

    # 9. FPS Í≥ÑÏÇ∞
    fps = 1000 / average_latency_ms

    print(f"  Average Inference Latency: {average_latency_ms:.2f} ms")
    print(f"  Frames Per Second (FPS): {fps:.2f}")

In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix


class SafetyEvalMetrics:
    def __init__(self, num_classes, class_names):
        self.num_classes = num_classes
        self.class_names = class_names
        self.confusion_matrix = np.zeros((num_classes, num_classes))

    def update(self, preds, gts):
        """
        Ìïú Î∞∞Ïπò(Batch) ÎòêÎäî Ìïú ÌîÑÎ†àÏûÑÏùò Í≤∞Í≥ºÎ•º ÎàÑÏ†ÅÌï©ÎãàÎã§.
        preds, gts: [Batch, H, W] ÌòïÌÉúÏùò Tensor ÎòêÎäî Numpy
        """
        preds = preds.flatten()
        gts = gts.flatten()

        # Ïú†Ìö®ÌïòÏßÄ ÏïäÏùÄ ÌÉÄÍ≤ü Í∞í Ï†úÏô∏ (Ïòà: Ìå®Îî© ÎòêÎäî ignore_indexÍ∞Ä Ìè¨Ìï®Îêú Í≤ΩÏö∞)
        valid_mask = (gts >= 0) & (gts < self.num_classes)
        preds = preds[valid_mask]
        gts = gts[valid_mask]

        # sklearnÏùò confusion_matrixÎ•º ÏÇ¨Ïö©ÌïòÏó¨ Îß§Ìä∏Î¶≠Ïä§ ÏóÖÎç∞Ïù¥Ìä∏
        # labels Ïù∏ÏûêÎ•º Î™ÖÏãúÏ†ÅÏúºÎ°ú Ï†úÍ≥µÌïòÏó¨ Î™®Îì† ÌÅ¥ÎûòÏä§Í∞Ä Ìè¨Ìï®ÎêòÎèÑÎ°ù Î≥¥Ïû•
        new_cm = confusion_matrix(gts, preds, labels=range(self.num_classes))
        self.confusion_matrix += new_cm

    def plot_confusion_matrix(self, normalize=True):
        """
        ÌòºÎèô ÌñâÎ†¨ ÏãúÍ∞ÅÌôî
        """
        cm = self.confusion_matrix
        if normalize:
            # Ìñâ(Actual) Í∏∞Ï§Ä Ï†ïÍ∑úÌôî: Ìï¥Îãπ ÌÅ¥ÎûòÏä§Í∞Ä Ïã§Ï†ú Î¨¥ÏóáÏúºÎ°ú ÏòàÏ∏°ÎêòÏóàÎäîÏßÄ ÎπÑÏú® ÌôïÏù∏
            # 0ÏúºÎ°ú ÎÇòÎàÑÎäî Ïò§Î•ò Î∞©ÏßÄÎ•º ÏúÑÌï¥ 1e-9 Ï∂îÍ∞Ä
            cm = cm.astype('float') / (cm.sum(axis=1)[:, np.newaxis] + 1e-9)

        plt.figure(figsize=(6, 4))
        sns.heatmap(cm, annot=True, fmt=".2f" if normalize else "d",
                    cmap="Blues", xticklabels=self.class_names, yticklabels=self.class_names)

        plt.title('Road Scene Segmentation: Confusion Matrix')
        plt.xlabel('Predicted Label')
        plt.ylabel('Ground Truth Label')
        plt.show()

    def analyze_safety_risks(self):
        """
        ÏπòÎ™ÖÏ†Å Ïò§Î∂ÑÎ•ò(Safety-Critical) ÏßëÏ§ë Î∂ÑÏÑù
        """
        cm_norm = self.confusion_matrix.astype('float') / (self.confusion_matrix.sum(axis=1)[:, np.newaxis] + 1e-9)

        print("\n=== [Safety-Critical Analysis] ===")

        try:
            rider_idx = self.class_names.index('Rider')
            mybike_idx = self.class_names.index('My bike')
            moveable_idx = self.class_names.index('Moveable')
            lanemark_idx = self.class_names.index('Lane Mark')
            road_idx = self.class_names.index('Road')
            undrivable_idx = self.class_names.index('Undrivable')

            print("--- Í≥†ÏúÑÌóò ---")
            undrivable_as_road = cm_norm[undrivable_idx, road_idx]
            print(f"1. Ï¶âÍ∞ÅÏ†ÅÏù∏ Ï∂©Îèå ÏúÑÌóò - Undrivable -> RoadÎ°ú Ïò§Î∂ÑÎ•ò : {undrivable_as_road:.2%}")

            missed_moveable_as_undrivable = cm_norm[moveable_idx, undrivable_idx]
            print(f"2. Ïù¥Îèô Î¨ºÏ≤¥ Ï∂©Îèå ÏúÑÌóò - Moveable -> UndrivableÎ°ú Ïò§Î∂ÑÎ•ò : {missed_moveable_as_undrivable:.2%}")

            missed_moveable_as_road = cm_norm[moveable_idx, road_idx]
            print(f"3. Ïù¥Îèô Î¨ºÏ≤¥ Ï∂©Îèå ÏúÑÌóò - Moveable -> RoadÎ°ú Ïò§Î∂ÑÎ•ò : {missed_moveable_as_road:.2%}")

            rider_as_undrivable = cm_norm[rider_idx, undrivable_idx]
            print(f"4. ÏãúÏä§ÌÖú Ïò§ÏûëÎèô Ïú†Î∞ú - Rider -> UndrivableÎ°ú Ïò§Î∂ÑÎ•ò : {rider_as_undrivable:.2%}")

            print("\n--- Ï§ëÏúÑÌóò ---")
            road_to_undrivable = cm_norm[road_idx, undrivable_idx]
            print(f"5. Í≤ΩÎ°ú Í≥ÑÌöç Ïò§Î•ò - Road -> Undrivable Î°ú Ïò§Î∂ÑÎ•ò : {road_to_undrivable:.2%}")

            lanemark_as_road = cm_norm[lanemark_idx, road_idx]
            print(f"6. Ï∞®ÏÑ† Ïú†ÏßÄ Î≥¥Ï°∞ Ïò§Î•ò - Lane Mark -> RoadÎ°ú Ïò§Î∂ÑÎ•ò : {lanemark_as_road:.2%}")

            print("\n--- ÎÇÆÏùÄ-Ï§ëÍ∞Ñ ÏúÑÌóò ---")
            rider_as_mybike = cm_norm[rider_idx, mybike_idx]
            print(f"7. ÏûêÍ∏∞ Ïù∏Ïãù Ïò§Î•ò - Rider -> My bikeÎ°ú Ïò§Î∂ÑÎ•ò : {rider_as_mybike:.2%}")

        except ValueError as e:
            print(f"Ïò§Î•ò: Îç∞Ïù¥ÌÑ∞ÏÖãÏùò ÌÅ¥ÎûòÏä§ Ïù¥Î¶ÑÏùÑ ÌôïÏù∏ÌïòÏÑ∏Ïöî: {e}")


In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

def run_safety_evaluation(model_to_eval, model_name, val_loader, device, num_classes, id2label):
    print(f"üöÄ Starting Safety Evaluation for {model_name}...")

    # Instantiate SafetyEvalMetrics
    class_names = [name for i, name in sorted(id2label.items())]
    evaluator = SafetyEvalMetrics(num_classes=num_classes, class_names=class_names)

    # Evaluate the model on the validation set to update the confusion matrix
    model_to_eval.eval()
    with torch.no_grad():
        for images, masks in val_loader:
            X = images.to(device).contiguous()
            y = masks.to(device).contiguous()

            outputs = model_to_eval(X).logits
            upsampled_logits = nn.functional.interpolate(
                outputs,
                size=y.shape[-2:],
                mode="bilinear",
                align_corners=False
            )

            preds = upsampled_logits.argmax(dim=1)
            evaluator.update(preds.cpu().numpy(), y.cpu().numpy())

    # Plot the confusion matrix
    evaluator.plot_confusion_matrix(normalize=True)
    evaluator.analyze_safety_risks()

In [None]:
# Define the path to the saved checkpoints
CHECKPOINTS_DIR = "./checkpoints"

# Recreate the model architecture (it must be the same as during training)
# The configuration `CFG['model_name']`, `id2label`, `label2id`, `num_labels` are from previous cells.

# Model for Best mIoU
model_best_miou = SegformerForSemanticSegmentation.from_pretrained(
    CFG['model_name'],
    num_labels=len(id_to_idx),
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
).to(CFG['device'])

# Load the state dictionary for the best mIoU model
miou_checkpoint_path = os.path.join(CHECKPOINTS_DIR, "segformer_best_miou.pth")
model_best_miou.load_state_dict(torch.load(miou_checkpoint_path, map_location=CFG['device']))
model_best_miou.eval()

# Model for Best mBoU
model_best_mbou = SegformerForSemanticSegmentation.from_pretrained(
    CFG['model_name'],
    num_labels=len(id_to_idx),
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
).to(CFG['device'])

# Load the state dictionary for the best mBoU model
mbou_checkpoint_path = os.path.join(CHECKPOINTS_DIR, "segformer_best_mbou.pth")
model_best_mbou.load_state_dict(torch.load(mbou_checkpoint_path, map_location=CFG['device']))
model_best_mbou.eval()

# Call the profiling function for model_best_miou
profile_model(model_best_miou, CFG['img_size'], CFG['device'])

print("\n" + "="*50 + "\n") # Separator for clarity

# Call the profiling function for model_best_mbou
profile_model(model_best_mbou, CFG['img_size'], CFG['device'])

# Run safety evaluation for model_best_miou
run_safety_evaluation(
    model_best_miou,
    "Best mIoU Model",
    val_loader,
    CFG['device'],
    CFG_EVAL['num_classes'],
    id2label
)

print("\n" + "="*50 + "\n") # Separator for clarity

# Run safety evaluation for model_best_mbou
run_safety_evaluation(
    model_best_mbou,
    "Best mBoU Model",
    val_loader,
    CFG['device'],
    CFG_EVAL['num_classes'],
    id2label
)