# üèçÔ∏è LossZero: Motorcycle Night Ride SegFormer-B2 Optimized

Ïù¥ ÎÖ∏Ìä∏Î∂ÅÏùÄ **SegFormer-B2** Î™®Îç∏ÏùÑ ÏÇ¨Ïö©ÌïòÏó¨ ÏïºÍ∞Ñ Ïò§ÌÜ†Î∞îÏù¥ Ï£ºÌñâ Ïù¥ÎØ∏ÏßÄÏùò ÏãúÎ©òÌã± ÏÑ∏Í∑∏Î©òÌÖåÏù¥ÏÖòÏùÑ ÏàòÌñâÌï©ÎãàÎã§.

### üõ†Ô∏è Ï£ºÏöî ÏãúÎÇòÎ¶¨Ïò§
- **Î™®Îç∏**: SegFormer-B2 (Transformer Í∏∞Î∞ò)
- **Î∞±Î≥∏**: MiT-B2
- **ÏÇ¨Ï†Ñ ÌïôÏäµ**: Cityscapes (ÎèÑÎ°ú ÌôòÍ≤Ω ÌäπÌôî)
- **ÏµúÏ†ÅÌôî**: AdamW + FP16 Mixed Precision
- **ÏÜêÏã§ Ìï®Ïàò**: Weighted CrossEntropy (Ï§ëÏöî Í∞ùÏ≤¥ Í∞ÄÏ§ëÏπò Î∂ÄÏó¨)

In [None]:
import os
import cv2
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from pycocotools.coco import COCO
import albumentations as A
from albumentations.pytorch import ToTensorV2
from transformers import SegformerForSemanticSegmentation, SegformerConfig
from torch.amp import autocast, GradScaler
from tqdm.auto import tqdm

print(f"PyTorch version: {torch.__version__}")


## Colab Ïó∞Í≤∞

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
def get_device():
    if torch.cuda.is_available():
        return "cuda"
   # elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
   #     return "mps"
    return "cpu"

# ‚öôÔ∏è ÏÑ§Ï†ï (Configuration)
#DATA_DIR = "/content/drive/MyDrive/motor_model"
DATA_DIR = os.path.expanduser("~/Projects/LossZero/data/Motorcycle Night Ride Dataset")
print("Detected Local Environment")

JSON_PATH = os.path.join(DATA_DIR, "COCO_motorcycle (pixel).json")
IMG_DIR = os.path.join(DATA_DIR, "images")

CFG = {
    "project": "LossZero",
    "model_name": "nvidia/segformer-b2-finetuned-cityscapes-1024-1024",
    "img_size": (352, 352),
    "batch_size": 4,
    "lr": 1e-4,
    "epochs": 25,
    "device": get_device()
}

print(f"Using device: {CFG['device']}")
print(f"Data directory: {DATA_DIR}")

Using device: cpu


In [None]:
def create_mask_from_json(coco, img_id, img_info, id_to_idx):
    ann_ids = coco.getAnnIds(imgIds=img_id)
    anns = coco.loadAnns(ann_ids)
    mask = np.zeros((img_info['height'], img_info['width']), dtype=np.uint8)

    for ann in anns:
        cat_id = ann['category_id']
        if cat_id in id_to_idx:
            cls_idx = id_to_idx[cat_id]
            pixel_mask = coco.annToMask(ann)
            mask[pixel_mask == 1] = cls_idx

    return mask

def process_single_data(coco, img_id, img_dir, id_to_idx, transform=None):
    img_info = coco.loadImgs(img_id)[0]
    img_path = os.path.join(img_dir, img_info['file_name'])

    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    mask = create_mask_from_json(coco, img_id, img_info, id_to_idx)

    if transform:
        augmented = transform(image=image, mask=mask)
        image, mask = augmented['image'], augmented['mask']

    return image, torch.as_tensor(mask).long()

train_transform = A.Compose([
    #  ÏõêÎ≥∏ Ìï¥ÏÉÅÎèÑÏóêÏÑú 352x352 ÌÅ¨Í∏∞Î°ú Î¨¥ÏûëÏúÑ Ï∂îÏ∂ú (ÌôîÏßà Ï†ÄÌïò ÏóÜÏùå)
    A.RandomCrop(height=CFG['img_size'][0], width=CFG['img_size'][1], p=1.0),
    A.PadIfNeeded(min_height=CFG['img_size'][0], min_width=CFG['img_size'][1], p=1.0),

    # --- ÏïºÍ∞Ñ Ï†ÑÏö© Augmentation Ï∂îÍ∞Ä ---
    A.CLAHE(clip_limit=2.0, tile_grid_size=(8, 8), p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
    A.RandomGamma(gamma_limit=(80, 120), p=0.5), # Ïñ¥ÎëêÏö¥ Ï†ÄÏ°∞ÎèÑ Í∞úÏÑ†
    A.GaussNoise(std_range=(0.02, 0.05), p=0.3), # ÏïºÍ∞Ñ ÎÖ∏Ïù¥Ï¶à ÎåÄÏùë

    # --- Í∏∞ÌïòÌïôÏ†Å Î≥ÄÌòï (Îç∞Ïù¥ÌÑ∞ Ïàò Î≥¥Ï∂©Ïö©) ---
    A.HorizontalFlip(p=0.5), # Ï¢åÏö∞ Î∞òÏ†Ñ
    # 0.0625Îäî Î®∏Ïã†Îü¨Îãù/Îî•Îü¨Îãù Ïª§ÎÆ§ÎãàÌã∞ÏóêÏÑú Ïò§Îû´ÎèôÏïà Í≤ÄÏ¶ùÎêú 'ÏÇ¨Ïã§ÏÉÅ ÌëúÏ§Ä(De Facto Standard)
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15, p=0.5), # Ïù¥Îèô/ÌÅ¨Í∏∞/ÌöåÏ†Ñ

    # ImageNet Îç∞Ïù¥ÌÉÄÏÖãÏùò ÌèâÍ∑†Í∞í ÎÇòÏÅòÏßÄ ÏïäÏùå. SegFormerÍ∞Ä ImageNet/CityscapesÎ°ú Î∞∞Ïõ†ÏúºÎãàÍπå
    # Î™®Îç∏Ïù¥ ÏÉàÎ°úÏö¥ ÏÇ¨ÏßÑÏùÑ Î∞õÏùÑ Îïå: ÏûÖÎ†•_Ïù¥ÎØ∏ÏßÄ = (ÏõêÎ≥∏_Ïù¥ÎØ∏ÏßÄ - ÌèâÍ∑†) / ÌëúÏ§ÄÌé∏Ï∞®
    # Ïù¥Î†áÍ≤å Í≥ÑÏÇ∞Ìï¥Ï£ºÎ©¥, Ïñ¥Îñ§ ÏÇ¨ÏßÑÏù¥ Îì§Ïñ¥ÏôÄÎèÑ "ÌèâÍ∑†Ïù¥ 0Ïù¥Í≥† ÌëúÏ§ÄÌé∏Ï∞®Í∞Ä 1Ïù∏(Standard Normal Distribution)" ÏïÑÏ£º ÏòàÏÅú Îç∞Ïù¥ÌÑ∞Î°ú Î≥ÄÏã†
    # Ï†ÑÏ≤¥ ÏïºÍ∞Ñ Îç∞Ïù¥ÌÑ∞ÏÖãÏùò Mean/StdÎ•º ÏßÅÏ†ë Í≥ÑÏÇ∞Ìïú Í∞í
    A.Normalize(mean=(0.281, 0.268, 0.346), std=(0.347, 0.290, 0.292)),
    ToTensorV2()
])

coco = COCO(JSON_PATH)
img_ids = list(coco.imgs.keys())
cat_ids = coco.getCatIds()
id_to_idx = {cat_id: i for i, cat_id in enumerate(cat_ids)}
print(f"Category Mapping: {id_to_idx}")

## Traing / Val Î∂Ñ

In [None]:
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

class MotorcycleNightRideDataset(Dataset):
    def __init__(self, coco, img_ids, img_dir, id_to_idx, transform=None):
        self.coco = coco
        self.img_ids = img_ids
        self.img_dir = img_dir
        self.id_to_idx = id_to_idx
        self.transform = transform

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]
        image, mask = process_single_data(self.coco, img_id, self.img_dir, self.id_to_idx, self.transform)
        return image, mask

# 1. Îç∞Ïù¥ÌÑ∞ Î°úÎìú Î∞è ID Î∂ÑÌï† (8:2)
coco = COCO(JSON_PATH)
all_ids = list(coco.imgs.keys())
train_ids, val_ids = train_test_split(all_ids, test_size=0.2, random_state=42)

# 2. Transform Ï†ïÏùò (Í∏∞Ï°¥ Ï†ïÏùò ÌôúÏö© Î∞è ValÏö© Ï∂îÍ∞Ä)
val_transform = A.Compose([
    A.Resize(CFG['img_size'][0], CFG['img_size'][1]),
    A.Normalize(mean=(0.281, 0.268, 0.346), std=(0.347, 0.290, 0.292)),
    ToTensorV2()
])

# 3. Îç∞Ïù¥ÌÑ∞ÏÖã Ïù∏Ïä§ÌÑ¥Ïä§ ÏÉùÏÑ±
train_dataset = MotorcycleNightRideDataset(coco, train_ids, IMG_DIR, id_to_idx, train_transform)
val_dataset = MotorcycleNightRideDataset(coco, val_ids, IMG_DIR, id_to_idx, val_transform)

# 4. Îç∞Ïù¥ÌÑ∞ Î°úÎçî ÏÉùÏÑ±
train_loader = DataLoader(
    train_dataset, 
    batch_size=CFG['batch_size'], 
    shuffle=True, 
    num_workers=os.cpu_count()
    pin_memory=True
)
val_loader = DataLoader(
    val_dataset, 
    batch_size=CFG['batch_size'], 
    shuffle=False, 
    num_workers=os.cpu_count()
    pin_memory=True
)

print(f"‚úÖ Data Ready: Train={len(train_ids)}, Val={len(val_ids)}")

### üìâ ÌÅ¥ÎûòÏä§Î≥Ñ Î∂ÑÌè¨ ÏöîÏïΩ (ÎÇ¥Î¶ºÏ∞®Ïàú)

1. **Undrivable (Ï£ºÌñâ Î∂àÍ∞Ä ÏòÅÏó≠)**: **42.9%** (ÏïïÎèÑÏ†Å 1ÏúÑ)
   - Î∞∞Í≤Ω(ÌïòÎäò, Í±¥Î¨º, ÌíÄÏà≤ Îì±)Ïù¥ Ïù¥ÎØ∏ÏßÄÏùò Ï†àÎ∞ò Í∞ÄÍπåÏù¥ Ï∞®ÏßÄÌï©ÎãàÎã§.
2. **Road (Ï£ºÌñâ Í∞ÄÎä• ÎèÑÎ°ú)**: **27.1%**
   - ÎèÑÎ°ú ÏûêÏ≤¥ÎèÑ ÍΩ§ ÎßéÏùÄ ÏòÅÏó≠ÏùÑ Ï∞®ÏßÄÌï©ÎãàÎã§.
3. **My bike (ÎÇ¥ Ïò§ÌÜ†Î∞îÏù¥)**: **15.8%**
   - Ï£ºÌñâÏûê ÏãúÏ†êÏù¥Îùº ÎÇ¥ Ïò§ÌÜ†Î∞îÏù¥Í∞Ä Ìï≠ÏÉÅ Î≥¥Ïù¥Í∏∞ ÎïåÎ¨∏Ïóê ÎπÑÏú®Ïù¥ ÎÜíÏäµÎãàÎã§.
4. **Rider (ÌÉëÏäπÏûê)**: **8.1%**
   - Îã§Î•∏ Ïò§ÌÜ†Î∞îÏù¥ Ïö¥Ï†ÑÏûêÎÇò ÎÇ¥ Ïã†Ï≤¥Í∞Ä Ìè¨Ìï®Îêú Í≤ÉÏúºÎ°ú Î≥¥ÏûÖÎãàÎã§.
5. **Moveable (Ïù¥Îèô Î¨ºÏ≤¥)**: **4.7%**
   - Îã§Î•∏ Ï∞®Îüâ, Î≥¥ÌñâÏûê Îì± ÏïàÏ†ÑÏóê Í∞ÄÏû• Ï§ëÏöîÌïú Ïû•Ïï†Î¨ºÏù∏Îç∞ ÎπÑÏú®Ïù¥ Îß§Ïö∞ ÎÇÆÏäµÎãàÎã§.
6. **Lane Mark (Ï∞®ÏÑ†)**: **1.4%**
   - Í∞ÄÏû• Ïã¨Í∞ÅÌïú Î∂àÍ∑†ÌòïÏûÖÎãàÎã§. ÎèÑÎ°ú Ï£ºÌñâÏùò ÌïµÏã¨Ïù∏ Ï∞®ÏÑ†Ïù¥ Í≥†Ïûë 1% ÎÇ®ÏßìÏûÖÎãàÎã§.

In [None]:
id2label = {i: coco.loadCats(cat_id)[0]['name'] for cat_id, i in id_to_idx.items()}
label2id = {v: k for k, v in id2label.items()}

model = SegformerForSemanticSegmentation.from_pretrained(
    CFG['model_name'],
    num_labels=len(id_to_idx),
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
).to(CFG['device'])

optimizer = torch.optim.AdamW(model.parameters(), lr=CFG['lr'], weight_decay=0.01)

# ‚öñÔ∏è ÌÅ¥ÎûòÏä§Î≥Ñ Í∞ÄÏ§ëÏπò ÏÑ§Ï†ï (Class Weights)
weights = torch.tensor([
    5.0,   # Rider (8.1%) -> Ï†ÅÎãπÌûà ÎÜíÏûÑ
    2.0,   # My bike (15.8%) -> ÎÇÆÏ∂§ (Ïù¥ÎØ∏ ÎßéÏùå)
    10.0,  # Moveable (4.7%) -> Í∞ïÎ†•ÌïòÍ≤å ÎÜíÏûÑ
    20.0,  # Lane Mark (1.4%) -> ÏïÑÏ£º Í∞ïÎ†•ÌïòÍ≤å!! (ÌïµÏã¨)
    1.0,   # Road (27.1%) -> Í∏∞Î≥∏
    0.5    # Undrivable (42.9%) -> ÎÇÆÏ∂§ (ÎÑàÎ¨¥ ÎßéÏïÑÏÑú Î∞©Ìï¥Îê®)
], dtype=torch.float).to(CFG['device'])

criterion = nn.CrossEntropyLoss(weight=weights)

scaler = GradScaler('cuda') if CFG['device'] == 'cuda' else None

In [None]:
print("üöÄ SegFormer-B2 Training Start...")

for epoch in range(CFG['epochs']):
    # --- Training Phase ---
    model.train()
    train_loss_sum = 0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1} [Train]")
    
    for images, masks in pbar:
        X = images.to(CFG['device']).contiguous()
        y = masks.to(CFG['device']).contiguous()
        
        optimizer.zero_grad()
        
        # Mixed Precision ÏßÄÏõê (CUDA Ï†ÑÏö©)
        if CFG['device'] == 'cuda' and scaler:
            with torch.amp.autocast('cuda'):
                outputs = model(X).logits
                upsampled_logits = nn.functional.interpolate(outputs, size=y.shape[-2:], mode="bilinear", align_corners=False)
                loss = criterion(upsampled_logits, y)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            outputs = model(X).logits
            upsampled_logits = nn.functional.interpolate(outputs, size=y.shape[-2:], mode="bilinear", align_corners=False)
            loss = criterion(upsampled_logits, y)
            loss.backward()
            optimizer.step()
            
        train_loss_sum += loss.item()
        pbar.set_postfix(Loss=f"{loss.item():.4f}")
    
    avg_train_loss = train_loss_sum / len(train_loader)

    # --- Validation Phase ---
    model.eval()
    val_loss_sum = 0
    with torch.no_grad():
        for images, masks in val_loader:
            X = images.to(CFG['device']).contiguous()
            y = masks.to(CFG['device']).contiguous()
            
            outputs = model(X).logits
            upsampled_logits = nn.functional.interpolate(outputs, size=y.shape[-2:], mode="bilinear", align_corners=False)
            loss = criterion(upsampled_logits, y)
            val_loss_sum += loss.item()
            
    avg_val_loss = val_loss_sum / len(val_loader)
    print(f"üìù Epoch [{epoch+1}/{CFG['epochs']}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")