In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('final_binarized_dataset.csv')
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
train_df.to_csv('train.csv', index=False)
val_df.to_csv('val.csv', index=False)
print(f'Train: {len(train_df)}, Val: {len(val_df)}')


Train: 4980, Val: 1246


In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import pandas as pd
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2

class RoadSegmentationDataset(Dataset):
    def __init__(self, csv_file, transforms=None):
        self.data = pd.read_csv(csv_file)
        self.transforms = transforms
    
    def __len__(self):
        return len(self.data)
    
    def preprocess_image(self, image):
        # 1. Denoising
        image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
        
        # 2. Histogram Equalization (CLAHE) for better contrast
        lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
        lab[:, :, 0] = clahe.apply(lab[:, :, 0])
        image = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
        
        # 3. Sharpening to enhance road edges
        kernel = np.array([[-1,-1,-1],
                          [-1, 9,-1],
                          [-1,-1,-1]])
        image = cv2.filter2D(image, -1, kernel)
        
        return image
    
    def __getitem__(self, idx):
        img_path = self.data.iloc[idx]['image_path']
        mask_path = self.data.iloc[idx]['mask_path']
        
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Apply advanced preprocessing
        image = self.preprocess_image(image)
        
        mask = cv2.imread(mask_path, 0)
        
        if self.transforms:
            augmented = self.transforms(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']
        
        image = image.float() / 255.0
        mask = mask.float() / 255.0
        mask = mask.unsqueeze(0)
        
        return image, mask

# More aggressive augmentations
train_transforms = A.Compose([
    A.Resize(512, 512),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=45, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.3),
    A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
    A.GaussianBlur(blur_limit=(3, 7), p=0.2),
    A.ElasticTransform(alpha=120, sigma=120 * 0.05, p=0.2),
    ToTensorV2()
])

val_transforms = A.Compose([
    A.Resize(512, 512),
    ToTensorV2()
])

train_dataset = RoadSegmentationDataset('train.csv', transforms=train_transforms)
val_dataset = RoadSegmentationDataset('val.csv', transforms=val_transforms)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=0)


  from .autonotebook import tqdm as notebook_tqdm
  original_init(self, **validated_kwargs)
  A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),


In [2]:
import segmentation_models_pytorch as smp
import torch

model = smp.DeepLabV3Plus(
    encoder_name="resnet50",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
    activation=None
)

if torch.backends.mps.is_available():
    device = torch.device('mps')
    print("Using MPS (Metal) device")
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

model = model.to(device)
print('DeepLabv3+ with ResNet50 loaded')


Using MPS (Metal) device
DeepLabv3+ with ResNet50 loaded


In [None]:
import torch.nn as nn
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

class DiceLoss(nn.Module):
    def __init__(self):
        super(DiceLoss, self).__init__()
    
    def forward(self, pred, target):
        smooth = 1e-5
        pred = torch.sigmoid(pred)
        intersection = (pred * target).sum(dim=(2, 3))
        union = pred.sum(dim=(2, 3)) + target.sum(dim=(2, 3))
        dice = (2.0 * intersection + smooth) / (union + smooth)
        return 1 - dice.mean()

class CombinedLoss(nn.Module):
    def __init__(self):
        super(CombinedLoss, self).__init__()
        self.bce = nn.BCEWithLogitsLoss()
        self.dice = DiceLoss()
    
    def forward(self, pred, target):
        return self.bce(pred, target) + self.dice(pred, target)

def calculate_dice(pred, target):
    smooth = 1e-5
    pred = torch.sigmoid(pred) > 0.5
    target = target > 0.5
    intersection = (pred & target).float().sum((1, 2, 3))
    union = pred.sum((1, 2, 3)) + target.sum((1, 2, 3))
    dice = (2.0 * intersection + smooth) / (union + smooth)
    return dice.mean().item()

def calculate_iou(pred, target):
    pred = torch.sigmoid(pred) > 0.5
    target = target > 0.5
    intersection = (pred & target).float().sum((1, 2, 3))
    union = (pred | target).float().sum((1, 2, 3))
    iou = (intersection + 1e-5) / (union + 1e-5)
    return iou.mean().item()

criterion = CombinedLoss()
optimizer = Adam(model.parameters(), lr=1e-3)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5)

num_epochs = 40
best_val_iou = 0.0
best_val_dice = 0.0

print("\n" + "="*80)
print("TRAINING STARTED - DeepLabv3+ ResNet50 | 512x512 | LR=1e-3 | Batch=4")
print("="*80 + "\n")

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_iou = 0.0
    train_dice = 0.0
    
    train_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Train]')
    for images, masks in train_bar:
        images = images.to(device)
        masks = masks.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        train_iou += calculate_iou(outputs, masks)
        train_dice += calculate_dice(outputs, masks)
        train_bar.set_postfix(loss=loss.item())
    
    train_loss /= len(train_loader)
    train_iou /= len(train_loader)
    train_dice /= len(train_loader)
    
    model.eval()
    val_loss = 0.0
    val_iou = 0.0
    val_dice = 0.0
    
    with torch.no_grad():
        val_bar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{num_epochs} [Val]  ')
        for images, masks in val_bar:
            images = images.to(device)
            masks = masks.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, masks)
            
            val_loss += loss.item()
            val_iou += calculate_iou(outputs, masks)
            val_dice += calculate_dice(outputs, masks)
            val_bar.set_postfix(loss=loss.item())
    
    val_loss /= len(val_loader)
    val_iou /= len(val_loader)
    val_dice /= len(val_loader)
    
    print("\n" + "-"*80)
    print(f"EPOCH {epoch+1}/{num_epochs} RESULTS:")
    print("-"*80)
    print(f"  TRAIN | Loss: {train_loss:.4f} | IoU: {train_iou:.4f} ({train_iou*100:.2f}%) | Dice: {train_dice:.4f} ({train_dice*100:.2f}%)")
    print(f"  VAL   | Loss: {val_loss:.4f} | IoU: {val_iou:.4f} ({val_iou*100:.2f}%) | Dice: {val_dice:.4f} ({val_dice*100:.2f}%)")
    print("-"*80)
    
    current_lr = optimizer.param_groups[0]['lr']
    print(f"  Learning Rate: {current_lr}")
    scheduler.step(val_iou)
    new_lr = optimizer.param_groups[0]['lr']
    if new_lr < current_lr:
        print(f"  → Learning rate reduced to {new_lr}")
    
    if val_iou > best_val_iou:
        best_val_iou = val_iou
        best_val_dice = val_dice
        torch.save(model.state_dict(), 'best_model.pth')
        print(f"  ✓ NEW BEST MODEL SAVED! Val IoU: {val_iou:.4f} ({val_iou*100:.2f}%) | Val Dice: {val_dice:.4f} ({val_dice*100:.2f}%)")
    print("-"*80)
    print()

print("\n" + "="*80)
print("TRAINING COMPLETE")
print("="*80)
print(f"  Best Validation IoU:  {best_val_iou:.4f} ({best_val_iou*100:.2f}%)")
print(f"  Best Validation Dice: {best_val_dice:.4f} ({best_val_dice*100:.2f}%)")
print("="*80 + "\n")



TRAINING STARTED - DeepLabv3+ ResNet50 | 512x512 | LR=1e-3 | Batch=4



Epoch 1/40 [Train]:   0%|          | 0/1245 [00:07<?, ?it/s]


KeyboardInterrupt: 