In [1]:
import os
import zipfile

# URL for the dataset
url = "https://zenodo.org/records/5706578/files/Train.zip?download=1"

# Download the file using wget
!wget -O /content/Train.zip "$url"

# Define the extraction path
extract_path = '/content/datasets/Train/'

# Create the extraction directory if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

# Extract the ZIP file
with zipfile.ZipFile('/content/Train.zip', 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# List the contents of the extracted folder
extracted_files = os.listdir(extract_path)
print("Extracted files:", extracted_files)


--2025-01-16 09:49:26--  https://zenodo.org/records/5706578/files/Train.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.43.25, 188.185.48.194, 188.185.45.92, ...
Connecting to zenodo.org (zenodo.org)|188.185.43.25|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4021669263 (3.7G) [application/octet-stream]
Saving to: ‘/content/Train.zip’


2025-01-16 09:54:53 (11.8 MB/s) - ‘/content/Train.zip’ saved [4021669263/4021669263]

Extracted files: ['Train']


In [2]:
import os
import zipfile

# URL for the dataset
url = "https://zenodo.org/records/5706578/files/Val.zip?download=1"

# Download the file using wget
!wget -O /content/Val.zip "$url"

# Define the extraction path
extract_path = '/content/datasets/Val/'

# Create the extraction directory if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

# Extract the ZIP file
with zipfile.ZipFile('/content/Val.zip', 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# List the contents of the extracted folder
extracted_files = os.listdir(extract_path)
print("Extracted files:", extracted_files)


--2025-01-16 09:55:30--  https://zenodo.org/records/5706578/files/Val.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.48.194, 188.185.45.92, 188.185.43.25, ...
Connecting to zenodo.org (zenodo.org)|188.185.48.194|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2425958254 (2.3G) [application/octet-stream]
Saving to: ‘/content/Val.zip’


2025-01-16 10:02:48 (5.29 MB/s) - ‘/content/Val.zip’ saved [2425958254/2425958254]

Extracted files: ['Val']


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import OrderedDict
from torchvision import models

expansion = 4

class ConvBN(nn.Module):  # Convolutional followed by Batch Norm
    def __init__(self, in_planes, out_planes, kernel_size=1, stride=1, padding=0, dilation=1):
        super(ConvBN, self).__init__()
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
                              padding=padding, dilation=dilation, bias=False)
        self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=1e-3)

    def forward(self, x):
        return self.bn(self.conv(x))

class Bottleneck(nn.Module):
    def __init__(self, in_planes, out_planes, stride=1, dilation=1, downsample=False, dropout_rate=0.5):
        super(Bottleneck, self).__init__()
        mid_planes = out_planes // expansion
        self.conv1 = ConvBN(in_planes, mid_planes, kernel_size=1, stride=stride)
        self.relu1 = nn.ReLU(inplace=True)

        # Dropout after the first convolution
        self.dropout1 = nn.Dropout2d(p=dropout_rate)

        self.conv2 = ConvBN(mid_planes, mid_planes, kernel_size=3, stride=1, padding=dilation, dilation=dilation)
        self.relu2 = nn.ReLU(inplace=True)

        # Dropout after the second convolution
        self.dropout2 = nn.Dropout2d(p=dropout_rate)

        self.conv3 = ConvBN(mid_planes, out_planes, kernel_size=1)
        self.relu3 = nn.ReLU(inplace=True)

        # Dropout after the third convolution
        self.dropout3 = nn.Dropout2d(p=dropout_rate)

        if downsample:
            self.shortcut = ConvBN(in_planes, out_planes, kernel_size=1, stride=stride)
        else:
            self.shortcut = nn.Identity()

    def forward(self, x):
        identity = self.shortcut(x)
        out = self.relu1(self.conv1(x))

        # Apply dropout after the first convolution
        out = self.dropout1(out)

        out = self.relu2(self.conv2(out))

        # Apply dropout after the second convolution
        out = self.dropout2(out)

        out = self.conv3(out)

        # Apply dropout after the third convolution
        out = self.dropout3(out)

        out += identity
        return self.relu3(out)

def make_layer(blocks, in_planes, out_planes, stride, dilation, dropout_rate=0.5):
    layers = OrderedDict()
    layers['block1'] = Bottleneck(in_planes, out_planes, stride=stride, dilation=dilation, downsample=True, dropout_rate=dropout_rate)
    for i in range(1, blocks):
        layers[f'block{i+1}'] = Bottleneck(out_planes, out_planes, stride=1, dilation=dilation, dropout_rate=dropout_rate)
    return nn.Sequential(layers)

class ASPP(nn.Module):
    def __init__(self, in_planes, out_planes, atrous_rates):
        super(ASPP, self).__init__()
        self.convs = nn.ModuleList([
            nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1,
                      padding=rate, dilation=rate, bias=True) for rate in atrous_rates
        ])
        self._init_weights()

    def _init_weights(self):
        for conv in self.convs:
            nn.init.normal_(conv.weight, mean=0, std=0.01)
            nn.init.constant_(conv.bias, 0)

    def forward(self, x):
        return sum(conv(x) for conv in self.convs)


# Define DeepLabV2 Model
class DeepLabV2(nn.Module):
    def __init__(self, n_classes):
        super(DeepLabV2, self).__init__()

        from torchvision.models import ResNet101_Weights
        model = models.resnet101(weights=ResNet101_Weights.IMAGENET1K_V1)


        # Keep only layers up to layer4
        self.backbone = nn.Sequential(*(list(model.children())[:-2]))  # Exclude the final FC layer
        self.aspp = nn.ModuleList([
            nn.Conv2d(2048, 256, kernel_size=3, padding=r, dilation=r, bias=True) # list of modules with different dilation rates
            for r in [6, 12, 18, 24]
        ])
        self.classifier = nn.Conv2d(256, n_classes, kernel_size=1)

        # Add upsampling layer
        #self.upsample = nn.Upsample(scale_factor=32, mode='bilinear', align_corners=True) # Upsample by 32 to match input size
        self.upsample = nn.Upsample(size=(720, 720), mode='bilinear', align_corners=True)  # Match target size

    def forward(self, x):
        x = self.backbone(x)
        aspp_out = sum(aspp(x) for aspp in self.aspp) # the outputs of the four convolutions are summed together

        x = self.classifier(aspp_out) # Apply the classifier

        # Upsample the output
        x = self.upsample(x) # Apply upsampling

        return x


In [4]:
class OhemCrossEntropy(nn.Module):
    def __init__(self, ignore_label=-1, thres=0.7,
                 min_kept=100000, weight=None):
        super(OhemCrossEntropy, self).__init__()
        self.thresh = thres
        self.min_kept = max(1, min_kept)
        self.ignore_label = ignore_label
        self.criterion = nn.CrossEntropyLoss(
            weight=weight,
            ignore_index=ignore_label,
            reduction='none'
        )

    def _ce_forward(self, score, target):


        loss = self.criterion(score, target)

        return loss

    def _ohem_forward(self, score, target, **kwargs):

        pred = F.softmax(score, dim=1)
        pixel_losses = self.criterion(score, target).contiguous().view(-1)
        mask = target.contiguous().view(-1) != self.ignore_label
        tmp_target = target.clone()
        tmp_target[tmp_target == self.ignore_label] = 0
        pred = pred.gather(1, tmp_target.unsqueeze(1))
        pred, ind = pred.contiguous().view(-1,)[mask].contiguous().sort()
        min_value = pred[min(self.min_kept, pred.numel() - 1)]
        threshold = max(min_value, self.thresh)

        pixel_losses = pixel_losses[mask][ind]
        pixel_losses = pixel_losses[pred < threshold]
        return pixel_losses.mean()

    def forward(self, score, target):

        if not (isinstance(score, list) or isinstance(score, tuple)):
            score = [score]

        #params from adam_oce_scheduler configuration

        balance_weights = [0.4, 1.0]
        sb_weights = 0.5
        if len(balance_weights) == len(score):
            functions = [self._ce_forward] * \
                (len(balance_weights) - 1) + [self._ohem_forward]
            return sum([
                w * func(x, target)
                for (w, x, func) in zip(balance_weights, score, functions)
            ])

        elif len(score) == 1:
            return sb_weights * self._ohem_forward(score[0], target)

        else:
            raise ValueError("lengths of prediction and target are not identical!")






class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, ignore_label=0, weight=None):

        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.ignore_label = ignore_label
        self.criterion = nn.CrossEntropyLoss(weight=weight, ignore_index=ignore_label, reduction='none')

    def forward(self, score, target):

        # Compute standard cross-entropy loss
        ce_loss = self.criterion(score, target)

        # Compute probability of the target class
        probs = F.softmax(score, dim=1)
        target_probs = probs.gather(1, target.unsqueeze(1)).squeeze(1)

        # Apply the focal loss formula
        focal_weight = self.alpha * (1 - target_probs) ** self.gamma
        focal_loss = focal_weight * ce_loss

        # Return the average loss
        return focal_loss.mean()

In [None]:
import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
!pip install thop
from thop import profile
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter


# Replace with your dataset class and helper functions
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from PIL import Image
import numpy as np

import albumentations as A
from albumentations.pytorch import ToTensorV2

class SimpleSegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, preferred_resolution=(720, 720), original_resolution=(1024, 1024), transform=None, augment=False, validation=False):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.preferred_resolution = preferred_resolution
        self.original_resolution = original_resolution
        self.transform = transform
        self.augment = augment
        self.validation = validation
        self.images = sorted(os.listdir(image_dir))
        self.masks = sorted(os.listdir(mask_dir))

        # Training augmentation transforms
        self.aug_transform = A.Compose([
            A.Resize(height=self.preferred_resolution[0], width=self.preferred_resolution[1], p=1.0),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Rotate(limit=30, p=0.5),
            A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ])



        # Validation transform (original resolution)
        self.val_transform = A.Compose([
            A.Resize(height=self.original_resolution[0], width=self.original_resolution[1], p=1.0),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.open(os.path.join(self.image_dir, self.images[idx])).convert('RGB')
        mask = Image.open(os.path.join(self.mask_dir, self.masks[idx]))

        # Convert image and mask to numpy arrays
        image = np.array(image)
        mask = np.array(mask)

        if self.validation:
            # Apply validation transform for original resolution
            transformed = self.val_transform(image=image, mask=mask)
        else:
            # Apply augmentation transform for training
            transformed = self.aug_transform(image=image, mask=mask)

        image = transformed["image"]
        mask = transformed["mask"]

        # Ensure mask is a LongTensor for CrossEntropyLoss
        mask = mask.clone().detach().to(dtype=torch.long)

        return image, mask


# Define Transform for Validation
val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


# Dice Loss Implementation
class DiceLossIgnoringIndex0(nn.Module):
    def __init__(self, eps=1e-6):
        super(DiceLossIgnoringIndex0, self).__init__()
        self.eps = eps

    def forward(self, preds, targets):
        if preds.shape[1] > 1:
            preds = F.softmax(preds, dim=1)
        num_classes = preds.shape[1]
        targets_one_hot = F.one_hot(targets, num_classes=num_classes).permute(0, 3, 1, 2).float()
        preds = preds[:, 1:]
        targets_one_hot = targets_one_hot[:, 1:]
        intersection = torch.sum(preds * targets_one_hot, dim=(2, 3))
        union = torch.sum(preds, dim=(2, 3)) + torch.sum(targets_one_hot, dim=(2, 3))
        dice_score = (2.0 * intersection + self.eps) / (union + self.eps)
        loss = 1.0 - dice_score.mean()
        return loss



# Calculate IoU (Intersection over Union) for validation
def calculate_iou(output, target, num_classes):
    output = torch.argmax(output, dim=1)
    iou_list = []
    for i in range(num_classes):
        intersection = ((output == i) & (target == i)).sum().float()
        union = ((output == i) | (target == i)).sum().float()
        iou = intersection / (union + 1e-6)  # Avoid division by zero
        iou_list.append(iou.item())
    return np.mean(iou_list)


def calculate_iou_ignore_index_0(output, target, num_classes):
    """
    Calculate mean IoU (mIoU) for each class, ignoring class index 0.

    Args:
    - output (Tensor): The predicted output (batch_size, height, width)
    - target (Tensor): The ground truth target mask (batch_size, height, width)
    - num_classes (int): The number of classes in the segmentation task

    Returns:
    - (float): The mean IoU over all classes, excluding class index 0.
    """
    output = torch.argmax(output, dim=1)
    iou_list = []

    for i in range(1, num_classes):  # Start from 1 to ignore index 0
        intersection = ((output == i) & (target == i)).sum().float()
        union = ((output == i) | (target == i)).sum().float()
        iou = intersection / (union + 1e-6)  # Avoid division by zero
        iou_list.append(iou.item())

    return np.mean(iou_list) if iou_list else 0.0


import matplotlib.pyplot as plt

def visualize_predictions(images, predictions, ground_truths, num_classes):
    for idx, (image, pred, gt) in enumerate(zip(images, predictions, ground_truths)):
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 3, 1)
        plt.title("Image")
        plt.imshow(image.permute(1, 2, 0).cpu().numpy())  # Original input image
        plt.subplot(1, 3, 2)
        plt.title("Prediction")
        plt.imshow(pred.cpu().numpy(), cmap='tab20', vmin=0, vmax=num_classes-1)  # Prediction mask
        plt.subplot(1, 3, 3)
        plt.title("Ground Truth")
        plt.imshow(gt.cpu().numpy(), cmap='tab20', vmin=0, vmax=num_classes-1)  # Actual ground truth mask

from torch.optim.lr_scheduler import ReduceLROnPlateau


def train(preferred_resolution=(512, 512)):
    # Paths and Hyperparameters
    dataset_dir = "datasets/Train/Train/Rural"
    output_dir = "checkpoints"
    os.makedirs(output_dir, exist_ok=True)
    log_dir = "logs"
    batch_size = 6
    num_classes = 8
    lr = 0.001
    epochs = 20
    save_interval = 5

    print(f"batch_size = {batch_size}")
    print(f"lr = {lr}")


    # Dataset Paths
    train_images = os.path.join(dataset_dir, "images_png")
    train_masks = os.path.join(dataset_dir, "masks_png")
    val_dir = "datasets/Val/Val/Rural"
    val_images = os.path.join(val_dir, "images_png")
    val_masks = os.path.join(val_dir, "masks_png")

    # Datasets and DataLoaders
    train_dataset = SimpleSegmentationDataset(train_images, train_masks, preferred_resolution=preferred_resolution, augment=True)
    val_dataset = SimpleSegmentationDataset(val_images, val_masks, original_resolution=(1024, 1024), augment=False, validation=True)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    # Model Initialization
    model = DeepLabV2(n_classes=num_classes)
    model = nn.DataParallel(model).cuda()

    # Optimizer and Loss
    #optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4)

    #criterion = nn.CrossEntropyLoss()
    #criterion = DiceLossIgnoringIndex0()
    #criterion = OhemCrossEntropy(ignore_label=0)
    criterion = FocalLoss()

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

    print(f"Optimizer: {type(optimizer).__name__}")  # Print the optimizer name
    print(f"Criterion: {type(criterion).__name__}")  # Print the name of the criterion class
    print(f"Scheduler: {type(scheduler).__name__}") # Print the name of the scheduler class

    # TensorBoard Setup
    writer = SummaryWriter(log_dir=log_dir)

    # Profiling FLOPs and Parameters (only once before training starts)
    images, _ = next(iter(val_loader))  # Get a single batch
    images = images.cuda()  # Move images to GPU

    # Temporarily move the model to CPU for profiling
    model_cpu = model.module.cpu()  # Extract the model from DataParallel
    flops, params = profile(model_cpu, inputs=(images.cpu(),), verbose=False)  # Use images on CPU

    model = model.cuda()  # Move model back to GPU


    # Training Loop
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for images, masks in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, masks = images.cuda(), masks.cuda()

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, masks)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        writer.add_scalar("Loss/train", avg_loss, epoch)
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {avg_loss:.4f}")

        # Validation
        model.eval()
        val_loss = 0.0
        val_iou = 0.0
        total_latency = 0.0

        with torch.no_grad():
            for images, masks in tqdm(val_loader, desc="Validation"):
                images, masks = images.cuda(), masks.cuda()

                # Measure latency
                start_time = time.time()

                # Forward pass
                outputs = model(images)  # Outputs are at wanted resolution

                end_time = time.time()
                total_latency += (end_time - start_time)


                # Upsample the outputs to match the original mask resolution
                outputs_upsampled = torch.nn.functional.interpolate(outputs, size=(1024, 1024), mode='bilinear', align_corners=False)

                # Compute Loss
                loss = criterion(outputs_upsampled, masks)
                val_loss += loss.item()

                # Compute IoU
                #val_iou += calculate_iou(outputs_upsampled, masks, num_classes)
                val_iou += calculate_iou_ignore_index_0(outputs_upsampled, masks, num_classes)

        avg_val_loss = val_loss / len(val_loader)
        avg_val_iou = val_iou / len(val_loader)
        avg_latency = total_latency / len(val_loader)
        avg_latency_per_image = avg_latency / batch_size


        # Logging Metrics
        writer.add_scalar("Loss/val", avg_val_loss, epoch)
        writer.add_scalar("IoU/val", avg_val_iou, epoch)
        writer.add_scalar("Latency/val", avg_latency, epoch)
        writer.add_scalar("FLOPs", flops, epoch)
        writer.add_scalar("Parameters", params, epoch)

        print(f"Validation - Loss: {avg_val_loss:.4f}, IoU: {avg_val_iou:.4f}, Latency: {avg_latency_per_image:.6f} sec")
        print(f"FLOPs: {flops:.2e}, Params: {params:.2e}")


        # Step Scheduler
        scheduler.step(avg_val_loss)

        # Save Model Checkpoint
        if (epoch + 1) % save_interval == 0:
            checkpoint_path = os.path.join(output_dir, f"model_epoch_{epoch+1}.pth")
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': avg_loss,
            }, checkpoint_path)

    writer.close()

if __name__ == "__main__":
    # Example: Train with a preferred resolution of 512x512
    train(preferred_resolution=(720, 720))

batch_size = 6
lr = 0.001
Optimizer: Adam
Criterion: FocalLoss
Scheduler: CosineAnnealingLR


Epoch 1/20: 100%|██████████| 228/228 [04:38<00:00,  1.22s/it]


Epoch [1/20] - Loss: 1.3027


Validation: 100%|██████████| 166/166 [02:11<00:00,  1.27it/s]


Validation - Loss: 1.0573, IoU: 0.1858, Latency: 0.032477 sec
FLOPs: 1.10e+12, Params: 6.14e+07


Epoch 2/20:  17%|█▋        | 38/228 [00:47<03:54,  1.24s/it]