In [1]:
%pip install segmentation-models-pytorch pytorch-lightning albumentations


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


Convert YOLOv8 Data to U-Net Segmentation Masks

In [2]:
import os
import numpy as np
from PIL import Image, ImageDraw

# --- Configuration ---
IMG_SIZE = 416 # All images are 416x416 
YOLO_CLASSES = ['Green Light', 'Red Light', 'Speed Limit 10', 'Speed Limit 100', 'Speed Limit 110', 'Speed Limit 120', 'Speed Limit 20', 'Speed Limit 30', 'Speed Limit 40', 'Speed Limit 50', 'Speed Limit 60', 'Speed Limit 70', 'Speed Limit 80', 'Speed Limit 90', 'Stop'] # 15 classes
# Create the map: YOLO Index (0-14) -> U-Net Index (1-15)
YOLO_TO_UNET_MAP = {str(i): i + 1 for i in range(len(YOLO_CLASSES))}

BASE_DIR = 'data/car' # Assuming your dataset is here
OUT_DIR = 'data_processed'

for split in ['train', 'valid', 'test']:
    label_dir = os.path.join(BASE_DIR, split, 'labels')
    mask_output_dir = os.path.join(OUT_DIR, split, 'masks')
    os.makedirs(mask_output_dir, exist_ok=True)

    for label_file in os.listdir(label_dir):
        if label_file.endswith('.txt'):
            mask = Image.new('L', (IMG_SIZE, IMG_SIZE), 0) # 'L' is for grayscale (8-bit)
            draw = ImageDraw.Draw(mask)
            
            with open(os.path.join(label_dir, label_file), 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) != 5: continue
                        
                    yolo_id, x_c, y_c, w, h = parts[0], *map(float, parts[1:])
                    unet_id = YOLO_TO_UNET_MAP.get(yolo_id)

                    # Denormalize coordinates
                    x_min = int((x_c - w/2) * IMG_SIZE)
                    y_min = int((y_c - h/2) * IMG_SIZE)
                    x_max = int((x_c + w/2) * IMG_SIZE)
                    y_max = int((y_c + h/2) * IMG_SIZE)

                    # Draw filled rectangle with the U-Net class ID
                    draw.rectangle([x_min, y_min, x_max, y_max], fill=unet_id)

            # Save the new mask image
            mask.save(os.path.join(mask_output_dir, label_file.replace('.txt', '.png')))

print("Mask generation complete. New 'masks' folders created.")

Mask generation complete. New 'masks' folders created.


In [3]:
# Install the necessary library if running in a new environment (e.g., Kaggle notebook)
# !pip install segmentation-models-pytorch pytorch-lightning albumentations

import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import numpy as np
import segmentation_models_pytorch as smp
# We'll use this for streamlined training:
# import pytorch_lightning as pl

# --- Global Configuration ---
# Your dataset parameters
IMG_SIZE = 416
NUM_CLASSES = 16 # 15 traffic object classes + 1 background (index 0)
BASE_DIR = 'data/car' # Assuming your dataset is here
OUT_DIR = 'data_processed'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
class UNetSegmentationDataset(Dataset):
    def __init__(self, root_dir, working_dir, split, augmentation=None):
        self.image_dir = os.path.join(root_dir, split, 'images')
        self.mask_dir = os.path.join(working_dir, split, 'masks')
        self.file_names = [f for f in os.listdir(self.image_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
        self.augmentation = augmentation

        # Standard ImageNet normalization for pre-trained backbones
        self.normalize = transforms.Compose([
            transforms.ToTensor(), 
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.file_names)

    def __getitem__(self, idx):
        img_name = self.file_names[idx]
        base_name = os.path.splitext(img_name)[0]
        
        img_path = os.path.join(self.image_dir, img_name)
        mask_path = os.path.join(self.mask_dir, base_name + '.png')

        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.uint8)

        # Apply geometric augmentation (must be applied to image and mask simultaneously)
        if self.augmentation:
             sample = self.augmentation(image=image, mask=mask)
             image, mask = sample['image'], sample['mask']

        # Apply final image normalization and ToTensor
        image = self.normalize(Image.fromarray(image))
        
        # Convert mask to LongTensor (required for cross-entropy loss)
        # Squeeze removes the channel dimension (H, W, 1) -> (H, W) if present
        mask = torch.from_numpy(mask).long().squeeze()
        
        return image, mask

# --- Data Loaders Setup ---
# Simple transform for now (using only the internal normalization)
train_dataset = UNetSegmentationDataset(root_dir=BASE_DIR, working_dir=OUT_DIR, split='train')
valid_dataset = UNetSegmentationDataset(root_dir=BASE_DIR, working_dir=OUT_DIR, split='valid')

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, num_workers=0)

print(f"Train batches: {len(train_loader)}, Validation batches: {len(valid_loader)}")

Train batches: 221, Validation batches: 51


In [5]:
# 1. Model Definition (using pre-trained ResNet34 encoder)
model = smp.Unet(
    encoder_name="resnet34",      # Popular choice for segmentation, highly effective
    encoder_weights="imagenet",   # Use ImageNet weights for faster training
    in_channels=3,
    classes=NUM_CLASSES,
    activation=None,              # Use raw logits; activation is handled in the loss/metrics
)
model.to(DEVICE)

# 2. Loss Function (Crucial for imbalanced data)
# Combine Dice Loss (for better boundary/overlap performance) and Cross-Entropy Loss (for faster convergence).
dice_loss = smp.losses.DiceLoss(mode='multiclass', classes=range(NUM_CLASSES))
ce_loss = nn.CrossEntropyLoss()

def combined_loss(y_pred, y_true):
    # Loss = 50% Dice Loss + 50% Cross-Entropy Loss
    return 0.5 * dice_loss(y_pred, y_true) + 0.5 * ce_loss(y_pred, y_true)

# 3. Optimizer and Metric
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# IoU (Intersection over Union) is the best metric for segmentation quality
# metrics = [
#     smp.metrics.IoU(threshold=0.5, activation='softmax', mode='multiclass', classes=range(NUM_CLASSES)),
# ]
metrics = [ smp.losses.JaccardLoss(mode='multiclass', classes=range(NUM_CLASSES))]

In [6]:
NUM_EPOCHS = 20
best_val_loss = float('inf')

for epoch in range(1, NUM_EPOCHS + 1):
    # --- Training Phase ---
    model.train()
    running_loss = 0.0
    
    for images, masks in train_loader:
        images, masks = images.to(DEVICE), masks.to(DEVICE)

        optimizer.zero_grad()
        
        logits = model(images)
        loss = combined_loss(logits, masks)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)

    train_loss = running_loss / len(train_dataset)

    # --- Validation Phase ---
    model.eval()
    running_val_loss = 0.0
    val_iou = 0.0
    
    with torch.no_grad():
        for images, masks in valid_loader:
            images, masks = images.to(DEVICE), masks.to(DEVICE)
            
            logits = model(images)
            loss = combined_loss(logits, masks)
            running_val_loss += loss.item() * images.size(0)
            
            # Calculate IoU metric
            val_iou += metrics[0](logits, masks).item() * images.size(0)

    val_loss = running_val_loss / len(valid_dataset)
    val_iou_mean = val_iou / len(valid_dataset)

    print(f"Epoch {epoch}/{NUM_EPOCHS}")
    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val IoU: {val_iou_mean:.4f}")

    # Save the best model based on validation loss
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_unet_model.pth')
        print("Model Saved! (New best validation loss)")

print("\nðŸŽ‰ Training Complete!")

Epoch 1/20
Train Loss: 1.0233 | Val Loss: 0.6781 | Val IoU: 0.5948
Model Saved! (New best validation loss)
Epoch 2/20
Train Loss: 0.5480 | Val Loss: 0.4136 | Val IoU: 0.5007
Model Saved! (New best validation loss)
Epoch 3/20
Train Loss: 0.4080 | Val Loss: 0.3452 | Val IoU: 0.4507
Model Saved! (New best validation loss)
Epoch 4/20
Train Loss: 0.3277 | Val Loss: 0.2722 | Val IoU: 0.3887
Model Saved! (New best validation loss)
Epoch 5/20
Train Loss: 0.2830 | Val Loss: 0.2346 | Val IoU: 0.3532
Model Saved! (New best validation loss)


KeyboardInterrupt: 