In [1]:
!pip install albumentations segmentation_models_pytorch

Collecting segmentation_models_pytorch
  Downloading segmentation_models_pytorch-0.4.0-py3-none-any.whl.metadata (32 kB)
Collecting efficientnet-pytorch>=0.6.1 (from segmentation_models_pytorch)
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pretrainedmodels>=0.7.1 (from segmentation_models_pytorch)
  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting munch (from pretrainedmodels>=0.7.1->segmentation_models_pytorch)
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading segmentation_models_pytorch-0.4.0-py3-none-any.whl (121 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.3/121.3 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading munch-4.0.0-py2.py3-none-any.whl (9.9 kB)
B

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import albumentations as A
from albumentations.pytorch import ToTensorV2
import segmentation_models_pytorch as smp
from tqdm import tqdm
from PIL import Image
import numpy as np
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

# Set device and handle multiple GPUs
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

# ================= Dataset Class =================
class RoadDataset(Dataset):
    def __init__(self, img_dir, mask_dir, transform=None):
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.images = os.listdir(img_dir)
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.images[idx])
        mask_path = os.path.join(self.mask_dir, self.images[idx])
        
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"))
        
        mask = (mask > 0).astype(np.float32)

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask.unsqueeze(0)

# ================= Transformations =================
def get_transforms(img_size=512):
    train_transform = A.Compose([
        A.Resize(img_size, img_size),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.RandomRotate90(p=0.5),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    
    val_transform = A.Compose([
        A.Resize(img_size, img_size),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ])
    return train_transform, val_transform

# ================= Loss Functions =================
def combined_loss(pred, target):
    focal_loss_fn = smp.losses.FocalLoss(mode="binary", alpha=0.8, gamma=2.0)
    dice_loss_fn = smp.losses.DiceLoss(mode="binary", from_logits=True)
    return 0.5 * focal_loss_fn(pred, target) + 0.5 * dice_loss_fn(pred, target)

def calculate_iou(pred, target):
    pred = torch.sigmoid(pred) > 0.5
    pred = pred.bool()
    target = target.bool()
    intersection = (pred & target).sum(dim=(2, 3))
    union = (pred | target).sum(dim=(2, 3))
    iou = (intersection + 1e-4) / (union + 1e-4)
    return iou.mean().item()

# ================= Model Initialization =================
def get_model(encoder_name="resnext101_32x8d", num_classes=1):
    model = smp.DeepLabV3Plus(
        encoder_name=encoder_name,
        encoder_weights="ssl",
        in_channels=3,
        classes=num_classes
    )
    return nn.DataParallel(model).to(device)

# ================= Training Function =================
def train_fn(loader, model, optimizer, loss_fn, scaler):
    loop = tqdm(loader, leave=True)
    model.train()
    total_loss = 0
    total_iou = 0

    for batch_idx, (data, targets) in enumerate(loop):
        data = data.to(device)
        targets = targets.to(device)

        with torch.cuda.amp.autocast():
            outputs = model(data)
            loss = loss_fn(outputs, targets)
            iou = calculate_iou(outputs, targets)

        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += loss.item()
        total_iou += iou
        loop.set_postfix(loss=loss.item(), IoU=iou)
    
    return total_loss / len(loader), total_iou / len(loader)

# ================= Validation Function =================
def val_fn(loader, model, loss_fn):
    model.eval()
    total_loss = 0
    total_iou = 0

    with torch.no_grad():
        for data, targets in loader:
            data = data.to(device)
            targets = targets.to(device)
            outputs = model(data)
            loss = loss_fn(outputs, targets)
            iou = calculate_iou(outputs, targets)
            total_loss += loss.item()
            total_iou += iou

    return total_loss / len(loader), total_iou / len(loader)

# ================= Save Model Function =================
def save_model(model, encoder_name, epoch, train_iou, val_iou, best_model_path=None):
    # Delete previous best model if it exists
    if best_model_path is not None and os.path.exists(best_model_path):
        os.remove(best_model_path)
    
    # Save new best model
    checkpoint = {
        'state_dict': model.state_dict(),
        'epoch': epoch,
        'train_iou': train_iou,
        'val_iou': val_iou
    }
    filename = f"DeepLabV3Plus_{encoder_name}_best_model_val_iou{val_iou:.4f}.pth"
    torch.save(checkpoint, filename)
    return filename

# ================= Main Function =================
def main(train_img_dir, train_mask_dir, val_img_dir, val_mask_dir):
    LEARNING_RATE = 1e-4
    BATCH_SIZE = 16
    NUM_EPOCHS = 90
    IMG_SIZE = 512
    ENCODER_NAME = "resnext101_32x8d"

    train_transform, val_transform = get_transforms(img_size=IMG_SIZE)
    train_dataset = RoadDataset(train_img_dir, train_mask_dir, transform=train_transform)
    val_dataset = RoadDataset(val_img_dir, val_mask_dir, transform=val_transform)

    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=4)

    model = get_model(encoder_name=ENCODER_NAME)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scaler = torch.cuda.amp.GradScaler()

    # Initialize best scores and model path
    best_val_iou = 0.0
    best_model_path = None

    # Training loop
    for epoch in range(NUM_EPOCHS):
        print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")
        train_loss, train_iou = train_fn(train_loader, model, optimizer, combined_loss, scaler)
        val_loss, val_iou = val_fn(val_loader, model, combined_loss)
        
        print(f"Train Loss: {train_loss:.4f}, Train IoU: {train_iou:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val IoU: {val_iou:.4f}")

        # Save model if validation IoU improves
        if val_iou > best_val_iou:
            best_val_iou = val_iou
            best_model_path = save_model(model, ENCODER_NAME, epoch + 1, train_iou, val_iou, best_model_path)
            print(f"New best model saved! Validation IoU: {val_iou:.4f}")

    print("\nTraining completed!")
    print(f"Best validation IoU: {best_val_iou:.4f}")
    print(f"Best model saved as: {best_model_path}")

if __name__ == '__main__':
    # Kaggle-specific paths (modify as needed)
    TRAIN_IMG_DIR = '/kaggle/input/r-shanghai/cmp_data(shanghai)/train/img'
    TRAIN_MASK_DIR = '/kaggle/input/r-shanghai/cmp_data(shanghai)/train/mask'
    VAL_IMG_DIR = '/kaggle/input/r-shanghai/cmp_data(shanghai)/val/img'
    VAL_MASK_DIR = '/kaggle/input/r-shanghai/cmp_data(shanghai)/val/mask'
    
    main(TRAIN_IMG_DIR, TRAIN_MASK_DIR, VAL_IMG_DIR, VAL_MASK_DIR)

  check_for_updates()


Device: cuda


Downloading: "https://dl.fbaipublicfiles.com/semiweaksupervision/model_files/semi_supervised_resnext101_32x8-2cfe2f8b.pth" to /root/.cache/torch/hub/checkpoints/semi_supervised_resnext101_32x8-2cfe2f8b.pth
100%|██████████| 340M/340M [00:05<00:00, 62.9MB/s] 
  scaler = torch.cuda.amp.GradScaler()



Epoch 1/90


  with torch.cuda.amp.autocast():
100%|██████████| 103/103 [01:17<00:00,  1.32it/s, IoU=0.31, loss=0.337] 


Train Loss: 0.3479, Train IoU: 0.2268
Val Loss: 0.2784, Val IoU: 0.3596
New best model saved! Validation IoU: 0.3596

Epoch 2/90


100%|██████████| 103/103 [01:21<00:00,  1.27it/s, IoU=0.361, loss=0.294]


Train Loss: 0.2697, Train IoU: 0.3673
Val Loss: 0.2278, Val IoU: 0.4173
New best model saved! Validation IoU: 0.4173

Epoch 3/90


100%|██████████| 103/103 [01:20<00:00,  1.28it/s, IoU=0.493, loss=0.174]


Train Loss: 0.2353, Train IoU: 0.4190
Val Loss: 0.2117, Val IoU: 0.4385
New best model saved! Validation IoU: 0.4385

Epoch 4/90


100%|██████████| 103/103 [01:20<00:00,  1.28it/s, IoU=0.535, loss=0.114]


Train Loss: 0.2179, Train IoU: 0.4480
Val Loss: 0.2021, Val IoU: 0.4651
New best model saved! Validation IoU: 0.4651

Epoch 5/90


100%|██████████| 103/103 [01:20<00:00,  1.28it/s, IoU=0.503, loss=0.227]


Train Loss: 0.2009, Train IoU: 0.4787
Val Loss: 0.1951, Val IoU: 0.4898
New best model saved! Validation IoU: 0.4898

Epoch 6/90


100%|██████████| 103/103 [01:20<00:00,  1.28it/s, IoU=0.458, loss=0.235]


Train Loss: 0.2008, Train IoU: 0.4828
Val Loss: 0.1838, Val IoU: 0.5015
New best model saved! Validation IoU: 0.5015

Epoch 7/90


100%|██████████| 103/103 [01:20<00:00,  1.28it/s, IoU=0.54, loss=0.104] 


Train Loss: 0.1814, Train IoU: 0.5059
Val Loss: 0.1924, Val IoU: 0.4925

Epoch 8/90


 96%|█████████▌| 99/103 [01:17<00:03,  1.30it/s, IoU=0.499, loss=0.226] 