# üèçÔ∏è LossZero: Motorcycle Night Ride SegFormer-B2 Optimized

Ïù¥ ÎÖ∏Ìä∏Î∂ÅÏùÄ **SegFormer-B2** Î™®Îç∏ÏùÑ ÏÇ¨Ïö©ÌïòÏó¨ ÏïºÍ∞Ñ Ïò§ÌÜ†Î∞îÏù¥ Ï£ºÌñâ Ïù¥ÎØ∏ÏßÄÏùò ÏãúÎ©òÌã± ÏÑ∏Í∑∏Î©òÌÖåÏù¥ÏÖòÏùÑ ÏàòÌñâÌï©ÎãàÎã§.

### üõ†Ô∏è Ï£ºÏöî ÏãúÎÇòÎ¶¨Ïò§
- **Î™®Îç∏**: SegFormer-B2 (Transformer Í∏∞Î∞ò)
- **Î∞±Î≥∏**: MiT-B2
- **ÏÇ¨Ï†Ñ ÌïôÏäµ**: Cityscapes (ÎèÑÎ°ú ÌôòÍ≤Ω ÌäπÌôî)
- **ÏµúÏ†ÅÌôî**: AdamW + FP16 Mixed Precision
- **ÏÜêÏã§ Ìï®Ïàò**: Weighted CrossEntropy (Ï§ëÏöî Í∞ùÏ≤¥ Í∞ÄÏ§ëÏπò Î∂ÄÏó¨)

In [94]:
import os
import cv2
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from pycocotools.coco import COCO
import albumentations as A
from albumentations.pytorch import ToTensorV2
from transformers import SegformerForSemanticSegmentation, SegformerConfig
from torch.amp import autocast, GradScaler
from tqdm.auto import tqdm

print(f"PyTorch version: {torch.__version__}")


PyTorch version: 2.6.0


## Colab Ïó∞Í≤∞

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [109]:
def get_device():
    if torch.cuda.is_available():
        return "cuda"

    return "cpu"

def num_worker():
    if torch.cuda.is_available():
        return os.cpu_count()

    return 0

# ‚öôÔ∏è ÏÑ§Ï†ï (Configuration)
#DATA_DIR = "/content/drive/MyDrive/motor_model"
DATA_DIR = os.path.expanduser("~/Projects/LossZero/data/Motorcycle Night Ride Dataset")
print("Detected Local Environment")

JSON_PATH = os.path.join(DATA_DIR, "COCO_motorcycle (pixel).json")
IMG_DIR = os.path.join(DATA_DIR, "images")

CFG = {
    "project": "LossZero",
    "model_name": "nvidia/segformer-b2-finetuned-cityscapes-1024-1024",
    "img_size": (384, 384),
    "batch_size": 8,
    "lr": 1e-4,
    "epochs": 20,
    "device": get_device(),
    "num_worker": num_worker()
}

print(f"Using device: {CFG['device']}")
print(f"Data directory: {DATA_DIR}")

Detected Local Environment
Using device: cpu
Data directory: /Users/jamesyang/Projects/LossZero/data/Motorcycle Night Ride Dataset


In [110]:
def create_mask_from_json(coco, img_id, img_info, id_to_idx):
    ann_ids = coco.getAnnIds(imgIds=img_id)
    anns = coco.loadAnns(ann_ids)
    mask = np.zeros((img_info['height'], img_info['width']), dtype=np.uint8)

    for ann in anns:
        cat_id = ann['category_id']
        if cat_id in id_to_idx:
            cls_idx = id_to_idx[cat_id]
            pixel_mask = coco.annToMask(ann)
            mask[pixel_mask == 1] = cls_idx

    return mask

def process_single_data(coco, img_id, img_dir, id_to_idx, transform=None):
    img_info = coco.loadImgs(img_id)[0]
    img_path = os.path.join(img_dir, img_info['file_name'])

    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    mask = create_mask_from_json(coco, img_id, img_info, id_to_idx)

    if transform:
        augmented = transform(image=image, mask=mask)
        image, mask = augmented['image'], augmented['mask']

    return image, torch.as_tensor(mask).long()

train_transform = A.Compose([
    #  ÏõêÎ≥∏ Ìï¥ÏÉÅÎèÑÏóêÏÑú 384x384 ÌÅ¨Í∏∞Î°ú Î¨¥ÏûëÏúÑ Ï∂îÏ∂ú (ÌôîÏßà Ï†ÄÌïò ÏóÜÏùå)
    A.RandomCrop(height=CFG['img_size'][0], width=CFG['img_size'][1], p=1.0),
    A.PadIfNeeded(min_height=CFG['img_size'][0], min_width=CFG['img_size'][1], p=1.0),

    # --- ÏïºÍ∞Ñ Ï†ÑÏö© Augmentation Ï∂îÍ∞Ä ---
    A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.RandomGamma(gamma_limit=(80, 120), p=0.5), # Ïñ¥ÎëêÏö¥ Ï†ÄÏ°∞ÎèÑ Í∞úÏÑ†
    A.GaussNoise(std_range=(0.02, 0.05), p=0.3), # ÏïºÍ∞Ñ ÎÖ∏Ïù¥Ï¶à ÎåÄÏùë

    # --- Í∏∞ÌïòÌïôÏ†Å Î≥ÄÌòï (Îç∞Ïù¥ÌÑ∞ Ïàò Î≥¥Ï∂©Ïö©) ---
    A.HorizontalFlip(p=0.5), # Ï¢åÏö∞ Î∞òÏ†Ñ
    # 0.0625Îäî Î®∏Ïã†Îü¨Îãù/Îî•Îü¨Îãù Ïª§ÎÆ§ÎãàÌã∞ÏóêÏÑú Ïò§Îû´ÎèôÏïà Í≤ÄÏ¶ùÎêú 'ÏÇ¨Ïã§ÏÉÅ ÌëúÏ§Ä(De Facto Standard)
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15, p=0.5), # Ïù¥Îèô/ÌÅ¨Í∏∞/ÌöåÏ†Ñ

    # ImageNet Îç∞Ïù¥ÌÉÄÏÖãÏùò ÌèâÍ∑†Í∞í ÎÇòÏÅòÏßÄ ÏïäÏùå. SegFormerÍ∞Ä ImageNet/CityscapesÎ°ú Î∞∞Ïõ†ÏúºÎãàÍπå
    # Î™®Îç∏Ïù¥ ÏÉàÎ°úÏö¥ ÏÇ¨ÏßÑÏùÑ Î∞õÏùÑ Îïå: ÏûÖÎ†•_Ïù¥ÎØ∏ÏßÄ = (ÏõêÎ≥∏_Ïù¥ÎØ∏ÏßÄ - ÌèâÍ∑†) / ÌëúÏ§ÄÌé∏Ï∞®
    # Ïù¥Î†áÍ≤å Í≥ÑÏÇ∞Ìï¥Ï£ºÎ©¥, Ïñ¥Îñ§ ÏÇ¨ÏßÑÏù¥ Îì§Ïñ¥ÏôÄÎèÑ "ÌèâÍ∑†Ïù¥ 0Ïù¥Í≥† ÌëúÏ§ÄÌé∏Ï∞®Í∞Ä 1Ïù∏(Standard Normal Distribution)" ÏïÑÏ£º ÏòàÏÅú Îç∞Ïù¥ÌÑ∞Î°ú Î≥ÄÏã†
    # Ï†ÑÏ≤¥ ÏïºÍ∞Ñ Îç∞Ïù¥ÌÑ∞ÏÖãÏùò Mean/StdÎ•º ÏßÅÏ†ë Í≥ÑÏÇ∞Ìïú Í∞í
    A.Normalize(mean=(0.281, 0.268, 0.346), std=(0.347, 0.290, 0.292)),
    ToTensorV2()
])

coco = COCO(JSON_PATH)
img_ids = list(coco.imgs.keys())
cat_ids = coco.getCatIds()
id_to_idx = {cat_id: i for i, cat_id in enumerate(cat_ids)}
print(f"Category Mapping: {id_to_idx}")

loading annotations into memory...
Done (t=0.76s)
creating index...
index created!
Category Mapping: {1329681: 0, 1323885: 1, 1323884: 2, 1323882: 3, 1323881: 4, 1323880: 5}


## Traing / Val Î∂Ñ

In [111]:
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

class MotorcycleNightRideDataset(Dataset):
    def __init__(self, coco, img_ids, img_dir, id_to_idx, transform=None):
        self.coco = coco
        self.img_ids = img_ids
        self.img_dir = img_dir
        self.id_to_idx = id_to_idx
        self.transform = transform

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        image, mask = process_single_data(self.coco, img_id, self.img_dir, self.id_to_idx, self.transform)
        return image, mask

# 1. Îç∞Ïù¥ÌÑ∞ Î°úÎìú Î∞è ID Î∂ÑÌï† (8:2)
coco = COCO(JSON_PATH)
all_ids = list(coco.imgs.keys())
train_ids, val_ids = train_test_split(all_ids, test_size=0.2, random_state=42)

# 2. Transform Ï†ïÏùò (Í∏∞Ï°¥ Ï†ïÏùò ÌôúÏö© Î∞è ValÏö© Ï∂îÍ∞Ä)
val_transform = A.Compose([
    A.Resize(CFG['img_size'][0], CFG['img_size'][1]),
    A.Normalize(mean=(0.281, 0.268, 0.346), std=(0.347, 0.290, 0.292)),
    ToTensorV2()
])

# 3. Îç∞Ïù¥ÌÑ∞ÏÖã Ïù∏Ïä§ÌÑ¥Ïä§ ÏÉùÏÑ±
train_dataset = MotorcycleNightRideDataset(coco, train_ids, IMG_DIR, id_to_idx, train_transform)
val_dataset = MotorcycleNightRideDataset(coco, val_ids, IMG_DIR, id_to_idx, val_transform)

# 4. Îç∞Ïù¥ÌÑ∞ Î°úÎçî ÏÉùÏÑ±
train_loader = DataLoader(
    train_dataset, 
    batch_size=CFG['batch_size'], 
    shuffle=True, 
    num_workers=CFG['num_worker'],
    pin_memory=True
)
val_loader = DataLoader(
    val_dataset, 
    batch_size=CFG['batch_size'], 
    shuffle=False, 
    num_workers=CFG['num_worker'],
    pin_memory=True
)

print(f"‚úÖ Data Ready: Train={len(train_ids)}, Val={len(val_ids)}")

loading annotations into memory...
Done (t=0.68s)
creating index...
index created!
‚úÖ Data Ready: Train=160, Val=40


### üìâ ÌÅ¥ÎûòÏä§Î≥Ñ Î∂ÑÌè¨ ÏöîÏïΩ (ÎÇ¥Î¶ºÏ∞®Ïàú)

1. **Undrivable (Ï£ºÌñâ Î∂àÍ∞Ä ÏòÅÏó≠)**: **42.9%** (ÏïïÎèÑÏ†Å 1ÏúÑ)
   - Î∞∞Í≤Ω(ÌïòÎäò, Í±¥Î¨º, ÌíÄÏà≤ Îì±)Ïù¥ Ïù¥ÎØ∏ÏßÄÏùò Ï†àÎ∞ò Í∞ÄÍπåÏù¥ Ï∞®ÏßÄÌï©ÎãàÎã§.
2. **Road (Ï£ºÌñâ Í∞ÄÎä• ÎèÑÎ°ú)**: **27.1%**
   - ÎèÑÎ°ú ÏûêÏ≤¥ÎèÑ ÍΩ§ ÎßéÏùÄ ÏòÅÏó≠ÏùÑ Ï∞®ÏßÄÌï©ÎãàÎã§.
3. **My bike (ÎÇ¥ Ïò§ÌÜ†Î∞îÏù¥)**: **15.8%**
   - Ï£ºÌñâÏûê ÏãúÏ†êÏù¥Îùº ÎÇ¥ Ïò§ÌÜ†Î∞îÏù¥Í∞Ä Ìï≠ÏÉÅ Î≥¥Ïù¥Í∏∞ ÎïåÎ¨∏Ïóê ÎπÑÏú®Ïù¥ ÎÜíÏäµÎãàÎã§.
4. **Rider (ÌÉëÏäπÏûê)**: **8.1%**
   - Îã§Î•∏ Ïò§ÌÜ†Î∞îÏù¥ Ïö¥Ï†ÑÏûêÎÇò ÎÇ¥ Ïã†Ï≤¥Í∞Ä Ìè¨Ìï®Îêú Í≤ÉÏúºÎ°ú Î≥¥ÏûÖÎãàÎã§.
5. **Moveable (Ïù¥Îèô Î¨ºÏ≤¥)**: **4.7%**
   - Îã§Î•∏ Ï∞®Îüâ, Î≥¥ÌñâÏûê Îì± ÏïàÏ†ÑÏóê Í∞ÄÏû• Ï§ëÏöîÌïú Ïû•Ïï†Î¨ºÏù∏Îç∞ ÎπÑÏú®Ïù¥ Îß§Ïö∞ ÎÇÆÏäµÎãàÎã§.
6. **Lane Mark (Ï∞®ÏÑ†)**: **1.4%**
   - Í∞ÄÏû• Ïã¨Í∞ÅÌïú Î∂àÍ∑†ÌòïÏûÖÎãàÎã§. ÎèÑÎ°ú Ï£ºÌñâÏùò ÌïµÏã¨Ïù∏ Ï∞®ÏÑ†Ïù¥ Í≥†Ïûë 1% ÎÇ®ÏßìÏûÖÎãàÎã§.

In [112]:
id2label = {i: coco.loadCats(cat_id)[0]['name'] for cat_id, i in id_to_idx.items()}
label2id = {v: k for k, v in id2label.items()}

model = SegformerForSemanticSegmentation.from_pretrained(
    CFG['model_name'],
    num_labels=len(id_to_idx),
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
).to(CFG['device'])

optimizer = torch.optim.AdamW(model.parameters(), lr=CFG['lr'], weight_decay=0.01)

# ‚öñÔ∏è ÌÅ¥ÎûòÏä§Î≥Ñ Í∞ÄÏ§ëÏπò ÏÑ§Ï†ï (Class Weights)
weights = torch.tensor([
    3.0,   # Rider: 5.0 ‚Üí 3.0 (Ï§ëÏöîÌïòÏßÄÎßå Í≥ºÌïòÏßÄ ÏïäÍ≤å)
    1.5,   # My bike: 2.0 ‚Üí 1.5 (ÎÇ¥ Ïò§ÌÜ†Î∞îÏù¥Îäî ÎÑàÎ¨¥ Ïûò ÎßûÌûàÎãà Ï°∞Í∏à Îçî ÎÇÆÏ∂§)
    5.0,   # Moveable: 10.0 ‚Üí 5.0 (Ï†àÎ∞òÏúºÎ°ú Ï§ÑÏó¨ÏÑú Î∂ÄÎã¥ ÏôÑÌôî)
    10.0,  # Lane Mark: 20.0 ‚Üí 10.0 (Ïó¨Ï†ÑÌûà Ï†úÏùº Í∞ïÎ†•ÌïòÏßÄÎßå, 20Î∞∞Îäî ÎÑàÎ¨¥ Í∞ÄÌòπÌñàÏùå)
    1.0,   # Road: 1.0 (Í∏∞Ï§ÄÏ†ê Ïú†ÏßÄ)
    0.8    # Undrivable: 0.5 ‚Üí 0.8 (Î∞∞Í≤ΩÏùÑ ÎÑàÎ¨¥ Î¨¥ÏãúÌï¥ÏÑú ÎèÑÎ°ú Í≤ΩÍ≥ÑÍ∞Ä Î¨¥ÎÑàÏßÄÎäî Í≤É Î∞©ÏßÄ)
], dtype=torch.float).to(CFG['device'])

criterion = nn.CrossEntropyLoss(weight=weights)

scaler = GradScaler('cuda') if CFG['device'] == 'cuda' else None

Loading weights:   0%|          | 0/380 [00:00<?, ?it/s]

[1mSegformerForSemanticSegmentation LOAD REPORT[0m from: nvidia/segformer-b2-finetuned-cityscapes-1024-1024
Key                           | Status   |                                                                                                    
------------------------------+----------+----------------------------------------------------------------------------------------------------
decode_head.classifier.bias   | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([19]) vs model:torch.Size([6])                      
decode_head.classifier.weight | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([19, 768, 1, 1]) vs model:torch.Size([6, 768, 1, 1])

[3mNotes:
- MISMATCH[3m	:ckpt weights were loaded, but they did not match the original empty weight shapes.[0m


In [None]:
print("üöÄ SegFormer-B2 Training Start...")

for epoch in range(CFG['epochs']):
    # --- Training Phase ---
    model.train()
    train_loss_sum = 0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1} [Train]")
    
    for images, masks in pbar:
        X = images.to(CFG['device']).contiguous()
        y = masks.to(CFG['device']).contiguous()
        
        optimizer.zero_grad()
        
        # Mixed Precision ÏßÄÏõê (CUDA Ï†ÑÏö©)
        if CFG['device'] == 'cuda' and scaler:
            with torch.amp.autocast('cuda'):
                outputs = model(X).logits
                upsampled_logits = nn.functional.interpolate(outputs, size=y.shape[-2:], mode="bilinear", align_corners=False)
                loss = criterion(upsampled_logits, y)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            outputs = model(X).logits
            upsampled_logits = nn.functional.interpolate(outputs, size=y.shape[-2:], mode="bilinear", align_corners=False)
            loss = criterion(upsampled_logits, y)
            loss.backward()
            optimizer.step()
            
        train_loss_sum += loss.item()
        pbar.set_postfix(Loss=f"{loss.item():.4f}")
    
    avg_train_loss = train_loss_sum / len(train_loader)

    # --- Validation Phase ---
    model.eval()
    val_loss_sum = 0
    with torch.no_grad():
        for images, masks in val_loader:
            X = images.to(CFG['device']).contiguous()
            y = masks.to(CFG['device']).contiguous()
            
            outputs = model(X).logits
            upsampled_logits = nn.functional.interpolate(outputs, size=y.shape[-2:], mode="bilinear", align_corners=False)
            loss = criterion(upsampled_logits, y)
            val_loss_sum += loss.item()
            
    avg_val_loss = val_loss_sum / len(val_loader)
    print(f"üìù Epoch [{epoch+1}/{CFG['epochs']}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

üöÄ SegFormer-B2 Training Start...


Epoch 1 [Train]:   0%|          | 0/20 [00:00<?, ?it/s]

### üõ†Ô∏è Ï£ºÏöî ÌèâÍ∞Ä Ìï≠Î™©
- **mIoU** (Mean Intersection over Union):
  - **Category-specific IoU**
  - **Boundary IoU**
- **Ïã§ÏãúÍ∞ÑÏÑ± Î∞è ÌïòÎìúÏõ®Ïñ¥ ÏßÄÌëú**
  - **Model Parameters**
  - **MACs** Multiply-Accumulate Operations
    - y = wx + b ÏóêÏÑú wx + bÎ•º 1MAC Ïù¥ÎùºÍ≥† ÌïúÎã§.
  - **GFLOPs** Giga Floating Point Operations
    -  Î™®Îç∏ÏùÑ Ìïú Î≤à Ïã§Ìñâ(Forward Pass)Ìï† Îïå ÌïÑÏöîÌïú Ï¥ù Î∂ÄÎèô ÏÜåÏàòÏ†ê Ïó∞ÏÇ∞Îüâ
    - Î≥¥ÌÜµ 1MAC = 2FLOPs
  - **Average Inference Latency**
  - **Frames Per Second (FPS)**
- **Safety-critical Metrics**

- TODO
  - ÌÖåÏä§Ìä∏ Ïù¥ÎØ∏ÏßÄÎ°ú ÌèâÍ∞ÄÌï† Í≤É ( ÌòÑÏû¨Îäî Ï†ÑÏ≤¥ Ïù¥ÎØ∏ÏßÄ SET ÏÇ¨Ïö© )

In [22]:
CFG_EVAL = {
    "num_classes": 6,
    "img_size": (CFG["img_size"], CFG["img_size"]),
}

In [23]:
import torch
import numpy as np
import cv2
from sklearn.metrics import confusion_matrix

def compute_category_iou(preds, targets, num_classes):
    """
    Confusion MatrixÎ•º ÏÇ¨Ïö©ÌïòÏó¨ ÌÅ¥ÎûòÏä§Î≥Ñ IoU(Intersection over Union)Î•º Í≥ÑÏÇ∞Ìï©ÎãàÎã§.
    Args:
        preds (torch.Tensor): ÏòàÏ∏°Îêú ÎßàÏä§ÌÅ¨ (N, H, W).
        targets (torch.Tensor): Ïã§Ï†ú Ï†ïÎãµ(Ground truth) ÎßàÏä§ÌÅ¨ (N, H, W).
        num_classes (int): ÌÅ¥ÎûòÏä§ Í∞úÏàò.
    Returns:
        np.ndarray: Í∞Å ÌÅ¥ÎûòÏä§Î≥Ñ IoU Í∞í.
    """
    preds_flat = preds.flatten().cpu().numpy()
    targets_flat = targets.flatten().cpu().numpy()

    # Ïú†Ìö®ÌïòÏßÄ ÏïäÏùÄ ÌÉÄÍ≤ü Í∞í Ï†úÏô∏ (Ïòà: Ìå®Îî© ÎòêÎäî ignore_indexÍ∞Ä Ìè¨Ìï®Îêú Í≤ΩÏö∞)
    valid_mask = (targets_flat >= 0) & (targets_flat < num_classes)
    preds_flat = preds_flat[valid_mask]
    targets_flat = targets_flat[valid_mask]

    # ÌòºÎèô ÌñâÎ†¨(Confusion Matrix) Í≥ÑÏÇ∞
    cm = confusion_matrix(targets_flat, preds_flat, labels=range(num_classes))

    # ÌÅ¥ÎûòÏä§Î≥Ñ IoU Í≥ÑÏÇ∞ Í≥µÏãù = TP / (TP + FP + FN)
    intersection = np.diag(cm)
    ground_truth_set = cm.sum(axis=1)
    predicted_set = cm.sum(axis=0)
    union = ground_truth_set + predicted_set - intersection

    # 0ÏúºÎ°ú ÎÇòÎàÑÎäî Ïò§Î•ò(division by zero) Î∞©ÏßÄ
    iou = intersection / (union + 1e-6)
    return iou

def get_boundary(mask, dilation_pixels=2):
    """
    Ïù¥ÏßÑ ÎßàÏä§ÌÅ¨(binary mask)ÏóêÏÑú Í≤ΩÍ≥ÑÏÑ† ÏòÅÏó≠ÏùÑ Ï∂îÏ∂úÌï©ÎãàÎã§.
    """
    mask = mask.astype(np.uint8)
    kernel = np.ones((3, 3), np.uint8)
    # Ïπ®Ïãù Ïó∞ÏÇ∞(Erosion)ÏùÑ ÌÜµÌï¥ ÎÇ¥Î∂ÄÎ•º ÍπéÏïÑÎÉÑ
    eroded = cv2.erode(mask, kernel, iterations=dilation_pixels)
    # ÏõêÎ≥∏ ÎßàÏä§ÌÅ¨ÏóêÏÑú Ïπ®ÏãùÎêú ÎßàÏä§ÌÅ¨Î•º ÎπºÏÑú Í≤ΩÍ≥ÑÏÑ†Îßå ÎÇ®ÍπÄ
    boundary = mask - eroded
    return boundary

def compute_boundary_iou(preds, targets, num_classes, dilation_pixels=2):
    """
    Í∞Å ÌÅ¥ÎûòÏä§Î≥ÑÎ°ú Boundary IoUÎ•º Í≥ÑÏÇ∞Ìï©ÎãàÎã§.
    """
    preds_np = preds.cpu().numpy()
    targets_np = targets.cpu().numpy()
    b_ious = []

    for c in range(num_classes):
        class_preds = (preds_np == c)
        class_targets = (targets_np == c)

        ious_per_batch = []
        for i in range(preds_np.shape[0]): # Î∞∞Ïπò ÎÇ¥ Í∞Å Ïù¥ÎØ∏ÏßÄÎ≥ÑÎ°ú Î∞òÎ≥µ
            gt_boundary = get_boundary(class_targets[i], dilation_pixels)
            pred_boundary = get_boundary(class_preds[i], dilation_pixels)

            intersection = ((gt_boundary > 0) & (pred_boundary > 0)).sum()
            union = ((gt_boundary > 0) | (pred_boundary > 0)).sum()

            if union == 0:
                # Ï†ïÎãµ(GT)Í≥º ÏòàÏ∏° Î™®Îëê Ìï¥Îãπ ÌÅ¥ÎûòÏä§Ïùò Í≤ΩÍ≥ÑÏÑ†Ïù¥ ÏóÜÎäî Í≤ΩÏö∞,
                # ÏôÑÎ≤ΩÌïòÍ≤å ÏùºÏπòÌïòÎäî Í≤ÉÏúºÎ°ú Í∞ÑÏ£ºÌïòÏó¨ IoU 1.0 Î∂ÄÏó¨
                # (ÌäπÏ†ï ÌÅ¥ÎûòÏä§Í∞Ä GTÏóê ÏóÜÏùÑ Îïå Ï†ÑÏ≤¥ ÌèâÍ∑†Ïóê ÎØ∏ÏπòÎäî ÏòÅÌñ•ÏùÑ Í≥†Î†§Ìïú Ï≤òÎ¶¨)
                ious_per_batch.append(1.0)
            else:
                ious_per_batch.append(intersection / union)

        # Î∞∞Ïπò ÎÇ¥Ïùò Ïñ¥Îñ§ Ïù¥ÎØ∏ÏßÄÏóêÏÑúÎèÑ Ìï¥Îãπ ÌÅ¥ÎûòÏä§Ïùò Í≤ΩÍ≥ÑÏÑ†Ïù¥ Î∞úÍ≤¨ÎêòÏßÄ ÏïäÏùÄ Í≤ΩÏö∞
        if not ious_per_batch:
            b_ious.append(0.0) # ÌïÑÏöîÏóê Îî∞Îùº NaN Îì±ÏúºÎ°ú Ï≤òÎ¶¨ Í∞ÄÎä•
        else:
            b_ious.append(np.mean(ious_per_batch))

    return np.array(b_ious)

In [24]:
print("üöÄ SegFormer-B2 Evaluation Start...")

model.eval()

all_preds = []
all_targets = []

total_category_ious = np.zeros(CFG_EVAL['num_classes'])
total_boundary_ious = np.zeros(CFG_EVAL['num_classes'])
num_batches = 0

with torch.no_grad():
    eval_pbar = tqdm(range(0, len(img_ids), CFG['batch_size']), desc="Evaluating")
    for i in eval_pbar:
        batch_ids = img_ids[i : i + CFG['batch_size']]

        images, masks = [], []
        for img_id in batch_ids:
            img, msk = process_single_data(coco, img_id, IMG_DIR, id_to_idx, train_transform)
            images.append(img)
            masks.append(msk)

        X = torch.stack(images).to(CFG['device']).contiguous()
        y = torch.stack(masks).to(CFG['device']).contiguous()

        outputs = model(X).logits.contiguous()
        upsampled_logits = nn.functional.interpolate(
            outputs,
            size=y.shape[-2:],
            mode="bilinear",
            align_corners=False
        ).contiguous()

        preds = upsampled_logits.argmax(dim=1)

        # Ìï¥Îãπ batchÏùò IoU Í≥ÑÏÇ∞
        category_ious = compute_category_iou(preds, y, CFG_EVAL['num_classes'])
        boundary_ious = compute_boundary_iou(preds, y, CFG_EVAL['num_classes'])

        total_category_ious += category_ious
        total_boundary_ious += boundary_ious
        num_batches += 1

# ÌèâÍ∑† IoUs Í≥ÑÏÇ∞
mean_category_ious = total_category_ious / num_batches
mean_boundary_ious = total_boundary_ious / num_batches

print()
print("--- Evaluation Results ---")
print("Category-specific IoU (Mean over batches):")
for i, val in enumerate(mean_category_ious):
    print(f"  Class {i} ({id2label[i]}): {val:.4f}")

print()
print("Boundary IoU (Mean over batches):")
for i, val in enumerate(mean_boundary_ious):
    print(f"  Class {i} ({id2label[i]}): {val:.4f}")

print()
print("Mean Category IoU (mIoU):", np.mean(mean_category_ious))
print("Mean Boundary IoU (mBoU):", np.mean(mean_boundary_ious))

üöÄ SegFormer-B2 Evaluation Start...


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]


--- Evaluation Results ---
Category-specific IoU (Mean over batches):
  Class 0 (Rider): 0.6650
  Class 1 (My bike): 0.7289
  Class 2 (Moveable): 0.4249
  Class 3 (Lane Mark): 0.1606
  Class 4 (Road): 0.6167
  Class 5 (Undrivable): 0.6699

Boundary IoU (Mean over batches):
  Class 0 (Rider): 0.0713
  Class 1 (My bike): 0.4250
  Class 2 (Moveable): 0.4554
  Class 3 (Lane Mark): 0.1616
  Class 4 (Road): 0.1179
  Class 5 (Undrivable): 0.3250

Mean Category IoU (mIoU): 0.5443298231911609
Mean Boundary IoU (mBoU): 0.2593434738526876


In [25]:
# TODO : requirement.txtÎ°ú Î≥¥ÎÇ¥Ïûê
import sys
!{sys.executable} -m pip install thop

print("thop installed successfully.")

Collecting thop
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop
Successfully installed thop-0.1.1.post2209072238
thop installed successfully.


In [26]:
import time
from thop import profile

print("üöÄ Starting Model Profiling...")

# 2. Î™®Îç∏ÏùÑ ÏúÑÌïú ÎçîÎØ∏ ÏûÖÎ†• ÌÖêÏÑú ÏÉùÏÑ±
dummy_input = torch.randn(1, 3, CFG['img_size'][0], CFG['img_size'][1]).to(CFG['device'])

# 3. ÎçîÎØ∏ ÏûÖÎ†•ÏùÑ ÏÇ¨Ïö©ÌïòÏó¨ Î™®Îç∏Ïùò MACs Î∞è Îß§Í∞úÎ≥ÄÏàò(params)Î•º Í≥ÑÏÇ∞ÌïòÍ∏∞ ÏúÑÌï¥ thop.profile ÏÇ¨Ïö©. verbose=FalseÎ°ú ÏÑ§Ï†ï.
macs, params = profile(model, inputs=(dummy_input,), verbose=False)

# 4. Í≥ÑÏÇ∞Îêú MACsÎ•º 1e9Î°ú ÎÇòÎàÑÏñ¥ GFLOPsÎ°ú Î≥ÄÌôò.
gflops = macs / 1e9

print(f"Model Parameters (M): {params / 1e6:.2f}")
print(f"MACs (G): {macs / 1e9:.2f}")
print(f"GFLOPs: {gflops:.2f}")

# 5. ÏßÄÏó∞ ÏãúÍ∞Ñ Î∞è FPS Ï∏°Ï†ïÏùÑ ÏúÑÌïú Î≥ÄÏàò Ï¥àÍ∏∞Ìôî.
num_warmup_runs = 10
num_inference_runs = 100
total_latency = 0.0

# Î™®Îç∏ÏùÑ ÌèâÍ∞Ä Î™®ÎìúÎ°ú ÏÑ§Ï†ï
model.eval()

# 6. ÏõåÎ∞çÏóÖ Ïã§Ìñâ ÏàòÌñâ
print(f"Performing {num_warmup_runs} warm-up runs...")
with torch.no_grad():
    for _ in range(num_warmup_runs):
        _ = model(dummy_input)

# 7. Ï∂îÎ°† ÏßÄÏó∞ ÏãúÍ∞Ñ Ï∏°Ï†ï
print(f"Measuring latency over {num_inference_runs} inference runs...")
with torch.no_grad():
    for _ in range(num_inference_runs):
        start_time = time.perf_counter()
        _ = model(dummy_input)
        end_time = time.perf_counter()
        total_latency += (end_time - start_time)

# 8. ÌèâÍ∑† ÏßÄÏó∞ ÏãúÍ∞Ñ(ms) Í≥ÑÏÇ∞
average_latency_ms = (total_latency / num_inference_runs) * 1000

# 9. FPS Í≥ÑÏÇ∞
fps = 1000 / average_latency_ms

print(f"Average Inference Latency: {average_latency_ms:.2f} ms")
print(f"Frames Per Second (FPS): {fps:.2f}")
print("Model Profiling Complete.")

üöÄ Starting Model Profiling...
Model Parameters (M): 27.35
MACs (G): 26.83
GFLOPs: 26.83
Performing 10 warm-up runs...
Measuring latency over 100 inference runs...
Average Inference Latency: 83.82 ms
Frames Per Second (FPS): 11.93
Model Profiling Complete.
