## <font style="color:blue">Project 4: Kaggle Competition - Semantic Segmentation</font>

#### Maximum Points: 100

<div>
    <table>
        <tr><td><h6>Sr. no.</h6></td> <td><h6>Section</h6></td> <td><h6>Points</h6></td> </tr>
        <tr><td><h6>1</h6></td> <td><h6>1.1. Dataset Class</h6></td> <td><h6>7</h6></td> </tr>
        <tr><td><h6>2</h6></td> <td><h6>1.2. Visualize dataset</h6></td> <td><h6>3</h6></td> </tr>
        <tr><td><h6>3</h6></td> <td><h6>2. Evaluation Metrics</h6></td> <td><h6>10</h6></td> </tr>
        <tr><td><h6>4</h6></td> <td><h6>3. Model</h6></td> <td><h6>10</h6></td> </tr>
        <tr><td><h6>5</h6></td> <td><h6>4.1. Train</h6></td> <td><h6>7</h6></td> </tr>
        <tr><td><h6>6</h6></td> <td><h6>4.2. Inference</h6></td> <td><h6>3</h6></td> </tr>
        <tr><td><h6>7</h6></td> <td><h6>5. Prepare Submission CSV</h6></td><td><h6>10</h6></td> </tr>
        <tr><td><h6>8</h6></td> <td><h6>6. Kaggle Profile Link</h6></td> <td><h6>50</h6></td> </tr>
    </table>
</div>

---

<h2>Dataset Description </h2>
<p>The dataset consists of 3,269 images in 12 classes (including background). All images were taken from drones in a variety of scales. Samples are shown below:
<img src="https://github.com/ishann/aeroscapes/blob/master/assets/data_montage.png?raw=true" width="800" height="800">
<p>The data was splitted into public train set and private test set which is used for evaluation of submissions.

In [None]:
DATA_PATH = "/kaggle/input/opencv-pytorch-segmentation-project/"
OUTPUT_PATH = "/kaggle/working/"

In [None]:
!pip install --quiet albumentations
!pip install --quiet torch-lr-finder
!pip install --quiet segmentation-models-pytorch
!pip install --quiet pytorch_toolbelt
!pip install --quiet iterative-stratification
!pip install --quiet torcheval

In [None]:
# Standard Library imports
import time
from pathlib import Path
import os
from collections import defaultdict

# External imports
import cv2
import torch
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import torchvision.transforms.functional as F
import torchvision.transforms as T
from torchvision import models
from torchvision.io import read_image
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
from tqdm.autonotebook import tqdm
from albumentations.pytorch import ToTensorV2
import albumentations as A
from torch_lr_finder import LRFinder
import segmentation_models_pytorch as smp
from pytorch_toolbelt.utils.rle import rle_encode, rle_to_string
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
from torcheval.metrics import MulticlassConfusionMatrix

plt.style.use('bmh')

In [None]:
def plot(x, ys:list, labels:list):
    """
    """
    
    fig, ax = plt.subplots(nrows=1, ncols=1, sharey=True, figsize=(10, 3))
    for i,y in enumerate(ys):
        label = labels[i]
        plt.plot(x, y, label=label)
    plt.legend()
    ax.set_xlabel("Epoch")
    ax.xaxis.set_major_locator(MultipleLocator(1))
    ax.yaxis.set_major_locator(MultipleLocator(0.1))
    ax.yaxis.set_minor_locator(MultipleLocator(0.05))
    ax.grid(which='major', color='black', linestyle='-')
    ax.grid(which='minor', color='gray', linestyle='-', alpha=0.2)   
    plt.show()

In [None]:
class Config:
    BATCH_SIZE = 2 if torch.cuda.is_available() else 2
    GRADIENT_ACCUMULATION_STEPS = 16
    EPOCHS = 5
    NUM_CLASSES = 12
    CROP_WIDTH = 512
    CROP_HEIGHT = 512
    
    # Parameters from: https://github.com/ultralytics/yolov5/blob/95ebf68f92196975e53ebc7e971d0130432ad107/data/hyps/hyp.scratch-low.yaml
    INITIAL_LR = 0.01
    FINAL_LR = 0.01
    MOMENTUM = 0.937 # SGD momentum/Adam beta1, from Yolo v5
    WEIGHT_DECAY = 0.0005 # optimizer weight decay, from Yolo v5
    
    NUM_WORKERS = 4  # There are 4 CPUs in Kaggle
    TRAIN_SPLIT = 0.8

    # This is basically the "background" class
    MASK_FILL_VALUE = 0

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#torch.manual_seed(42)

config = Config()

# <font style="color:green">1. Data Exploration</font>

## <font style="color:green">1.1. Dataset Class [7 Points]</font>

In [None]:
class SemSegDataset(Dataset):
    """
    Generic Dataset class for semantic segmentation datasets.
    """

    def __init__(
        self,
        data_path,
        images_folder,
        masks_folder,
        image_ids,
        train_val_test,
        transforms=None,
    ):
        """
        Args:
            data_path (string): Path to the dataset folder.
            images_folder (string): Name of the folder containing the images.
            masks_folder (string): Name of the folder containing the masks.
            image_ids (list): List of image IDs to include in the dataset.
            train_val_test (string): 'train', 'val', or 'test'.
            transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
        """

        self.data_path = data_path
        self.images_folder = images_folder
        self.masks_folder = masks_folder
        self.image_ids = image_ids
        self.train_val_test = train_val_test
        self.transforms = transforms


    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]

        # Get image and mask paths
        image_path = os.path.join(self.data_path, self.images_folder, f"{image_id}.jpg")
        mask_path = os.path.join(self.data_path, self.masks_folder, f"{image_id}.png")

        # Load image and mask
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        if self.transforms is not None:
            if mask is None:
                return self.transforms(image=image)['image']
            else:
                transformed = self.transforms(image=image, mask=mask)

            return transformed['image'], transformed['mask']
        
        return image, mask

In [None]:
train_csv_path = Path(DATA_PATH) / "train.csv"
test_csv_path = Path(DATA_PATH) / "test.csv"

train_df = pd.read_csv(train_csv_path)
test_df = pd.read_csv(test_csv_path)

# train_ids, valid_ids = torch.utils.data.random_split(train_df.ImageID, [config.TRAIN_SPLIT, 1-config.TRAIN_SPLIT])
test_ids = test_df.ImageID.ravel().tolist()

In [None]:
whole_dataset = SemSegDataset(
    DATA_PATH,
    "imgs/imgs",
    "masks/masks",
    train_df.ImageID,
    train_val_test="train"
)

def extract_and_onehot_encode_classes_from_multilabel_masks(dataset):
    """
    Extract the classes available in each multilabel binary mask and one-hot encodes them.

    Returns:
        A NumPy array of one-hot encoded classes, where the dimensions are
        (num_images, num_classes). Each value in the array represents the presence
        (1) or absence (0) of a specific class in the corresponding image.
    """
    ys = []
    for i in tqdm(range(len(dataset))):
        image, mask = dataset[i]
        y = np.unique(mask).reshape(1,-1)
        y = torch.Tensor(y).to(torch.int64)
        y = torch.nn.functional.one_hot(y, num_classes=config.NUM_CLASSES)
        y = y.sum(axis=1)
        y = y.numpy()
        ys.append(y)
    
    return np.concatenate(ys, axis=0)

In [None]:
X = train_df.ImageID
y = extract_and_onehot_encode_classes_from_multilabel_masks(whole_dataset)

In [None]:
msss = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=(1-config.TRAIN_SPLIT), random_state=0)

for train_index, test_index in msss.split(X, y):
    train_ids, valid_ids = X[train_index], X[test_index]
    # y_train, y_test = y[train_index], y[test_index]

In [None]:
train_transforms = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.ShiftScaleRotate(
        shift_limit=0,
        rotate_limit=0,
        scale_limit=0.1,
        border_mode=cv2.BORDER_CONSTANT,
        value=0,
        mask_value=config.MASK_FILL_VALUE,
        interpolation=cv2.INTER_CUBIC,
        p=0.5,
    ),
    A.RandomBrightnessContrast(
        brightness_limit=0.02,
        contrast_limit=0.02,
        p=0.5
    ),
    A.ElasticTransform(
        alpha=120,
        sigma=20,
        value=0,
        mask_value=config.MASK_FILL_VALUE,
        border_mode=cv2.BORDER_CONSTANT,
        alpha_affine=5,
        interpolation=cv2.INTER_CUBIC,
        p=0.5,
    ),
    A.GridDistortion(
        num_steps=10,
        distort_limit=0.03,
        value=0,
        mask_value=config.MASK_FILL_VALUE,
        border_mode=cv2.BORDER_CONSTANT,
        p=1,
        interpolation=cv2.INTER_CUBIC,
    ),
    A.Normalize(),
    ToTensorV2(),
])

valid_transforms = A.Compose([
    A.Normalize(),
    ToTensorV2()
])

test_transforms = A.Compose([
    A.Normalize(),
    ToTensorV2()
])


train_dataset = SemSegDataset(
    DATA_PATH,
    "imgs/imgs",
    "masks/masks",
    train_ids.tolist(),
    train_val_test="train",
    transforms=train_transforms,
)

valid_dataset = SemSegDataset(
    DATA_PATH,
    "imgs/imgs",
    "masks/masks",
    valid_ids.tolist(),
    train_val_test="validation",
    transforms=valid_transforms
)

test_dataset = SemSegDataset(
    DATA_PATH,
    "imgs/imgs",
    "masks/masks",
    test_ids,
    train_val_test="test",
    transforms=test_transforms
)

# Reason for drop_last: https://discuss.pytorch.org/t/error-expected-more-than-1-value-per-channel-when-training/26274/5
train_dataloader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, drop_last=True, num_workers=config.NUM_WORKERS)
valid_dataloader = DataLoader(valid_dataset, batch_size=config.BATCH_SIZE, shuffle=False, drop_last=True, num_workers=config.NUM_WORKERS)
#test_dataloader = DataLoader(test_dataset, batch_size=config.BATCH_SIZE, shuffle=False, drop_last=False, num_workers=config.NUM_WORKERS)

## <font style="color:green">1.2. Visualize dataset [3 Points]</font>

In [None]:
def draw_semantic_segmentation_batch(dataset, n_samples=3):
    """
    """
    fig, ax = plt.subplots(nrows=n_samples, ncols=2, sharey=True, figsize=(10, 3*n_samples))
    for i in range(n_samples):
        image, mask = dataset[i]

        # CHW -> HWC
        image = image.permute(1, 2, 0).detach().cpu().numpy()
        ax[i][0].imshow(image)
        ax[i][0].set_xlabel("Image")
        ax[i][0].set_xticks([])
        ax[i][0].set_yticks([])

        mask = torch.squeeze(mask)
        mask = mask.detach().cpu().numpy()
        
        # Create colors for the visualization, one for each class
#         colormap = plt.get_cmap('jet')
#         linear_space = np.linspace(0, 1, config.NUM_CLASSES)
#         colors = (255*colormap(linear_space)).astype(np.uint8)
#         # Add black for the last unused filled mask vlaue
#         colors = np.vstack([colors, [0, 0, 0, 255]])

        colors = np.array([
            [0, 0, 0], # background
            [248, 200, 220], # person
            [0, 255, 0], # bike
            [107, 107, 107], # car
            [144, 12, 63], # drone
            [25, 25, 112], # boat
            [255, 0, 255], # animal
            [255, 0, 0], # obstacle
            [255, 195, 0], # construction
            [9, 121, 105], # vegetation
            [200, 200, 60], # road
            [135, 206, 235], # sky
        ])
        
        rgb_labels = colors[mask]
        
        ax[i][1].imshow(rgb_labels)
        ax[i][1].set_xlabel("Ground truth mask")
        ax[i][1].set_xticks([])
        ax[i][1].set_yticks([])

    plt.tight_layout()
    plt.show()
    plt.close(fig)

In [None]:
draw_semantic_segmentation_batch(train_dataset, n_samples=10)

In [None]:
draw_semantic_segmentation_batch(valid_dataset, n_samples=10)

## Visualize each class

In [None]:
def draw_mask_overlay(color_image, mask, class_id, alpha=0.5, color=(0, 0, 255)):
    overlay = np.copy(color_image)
    overlay[mask==class_id] = color
    cv2.addWeighted(overlay, alpha, color_image, 1-alpha, 0, dst=overlay)
    return overlay

In [None]:
image, mask = whole_dataset[0]
#image = image.permute(1, 2, 0).detach().cpu().numpy()  # CHW -> HWC

for i in range(config.NUM_CLASSES):
    
    overlay = draw_mask_overlay(image, mask, i, alpha=0.5, color=(255,0,0))

    fig, ax = plt.subplots(nrows=1, ncols=1, sharey=True, figsize=(6, 6))
    ax.imshow(overlay)
    ax.set_xlabel(f"Class {i}")
    ax.set_xticks([])
    ax.set_yticks([])

    plt.tight_layout()
    plt.show()
    plt.close(fig)

In [None]:
def count_images_per_class(dataset):
    """
    """
    
    d = defaultdict(int)
    for i in tqdm(range(len(dataset))):
        image, mask = dataset[i]
        classes = np.unique(mask)
        for c in classes:
            d[c] += 1
    
    return d

In [None]:
# dataset_count = count_images_per_class(whole_dataset)

In [None]:
# NOTE: the train dataset pipeline does random cropping during augmentation,
# that's why there may be less classes in an augmented train image than in the original image
# train_count = count_images_per_class(train_dataset)

In [None]:
# valid_count = count_images_per_class(valid_dataset)

In [None]:
# for i in range(config.NUM_CLASSES):
#     print(f"Class {i:02}: total: {dataset_count[i]:04} | train: {train_count[i]:04} | valid: {valid_count[i]:04}")

# <font style="color:green">2. Evaluation Metrics [10 Points]</font>

<p>This competition is evaluated on the mean <a href='https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient'>Dice coefficient</a
>. The Dice coefficient can be used to compare the pixel-wise agreement between a predicted segmentation and its corresponding ground truth. The formula is given by: </p>

<p>$$DSC =  \frac{2 |X \cap Y|}{|X|+ |Y|}$$
$$ \small \mathrm{where}\ X = Predicted\ Set\ of\ Pixels,\ \ Y = Ground\ Truth $$ </p>
<p>The Dice coefficient is defined to be 1 when both X and Y are empty.</p>

In [None]:
class DiceScore(torch.nn.Module):
    """
    """

    def __init__(self, num_classes, ignore_index=0):
        super().__init__()
        self.num_classes = num_classes
        self.ignore_index: int = ignore_index
        self.eps = 1e-6
        self.metric = MulticlassConfusionMatrix(self.num_classes)

    def __call__(self, pred, target):
        """
        pred: NxHxW
        target: NxCxHxW
        """
        self.metric.reset()
        
        self.metric.update(pred.flatten(), target.flatten())
        conf_matrix =  self.metric.compute()

        true_positive = torch.diag(conf_matrix)
        false_positive = torch.sum(conf_matrix, 0) - true_positive
        false_negative = torch.sum(conf_matrix, 1) - true_positive

        DSC = (2 * true_positive + self.eps) / (
            2 * true_positive + false_positive + false_negative + self.eps
        )
        
        return DSC

# <font style="color:green">3. Model [10 Points]</font>

In [None]:
def make_model():
    model = models.segmentation.deeplabv3_resnet101(weights='DeepLabV3_ResNet101_Weights.DEFAULT', progress=True)

    model.classifier = models.segmentation.deeplabv3.DeepLabHead(2048, config.NUM_CLASSES)
    model.aux_classifier[4] = torch.nn.Conv2d(256, config.NUM_CLASSES, 1)

    # Another option:
    # model.classifier[4] = nn.LazyConv2d(num_classes, 1)
    # model.aux_classifier[4] = nn.LazyConv2d(num_classes, 1)

    # This is the code for DeepLabHead(in_channels, num_classes):
    #
    # ASPP(in_channels, [12, 24, 36]),
    # nn.Conv2d(256, 256, 3, padding=1, bias=False),
    # nn.BatchNorm2d(256),
    # nn.ReLU(),
    # nn.Conv2d(256, num_classes, 1),

    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False
    
    # Set the last backbone layer to be trainable
    for param in model.backbone.layer4.parameters():
        param.requires_grad = True
        
    for param in model.classifier.parameters():
        param.requires_grad = True
        
    for param in model.aux_classifier.parameters():
        param.requires_grad = True

    return model

In [None]:
model = make_model().to(device)

# <font style="color:green">4. Train & Inference</font>
## <font style="color:green">4.1. Train [7 Points]</font>

In [None]:
class SoftDiceLoss(torch.nn.Module):
    """
        Implementation of the Soft-Dice Loss function.

        Arguments:
            num_classes (int): number of classes.
            eps (float): value of the floating point epsilon.
    """
    def __init__(self, num_classes, eps=1e-5):
        super().__init__()
        self.num_classes = num_classes
        self.eps = eps

    def forward(self, preds, targets):
        """
            Compute Soft-Dice Loss.

            Arguments:
                preds (torch.FloatTensor):
                    tensor of predicted labels. The shape of the tensor is (B, num_classes, H, W).
                targets (torch.LongTensor):
                    tensor of ground-truth labels. The shape of the tensor is (B, H, W).
            Returns:
                mean_loss (float32): mean loss by class  value.
        """

        loss = 0
        for cls in range(self.num_classes):

            # get ground truth for the current class
            target = (targets == cls).float()

            # get prediction for the current class
            pred = preds[:, cls]

            # calculate intersection
            intersection = (pred * target).sum()

            # compute dice coefficient
            dice = (2 * intersection + self.eps) / (pred.sum() + target.sum() + self.eps)
            
            # compute negative logarithm from the obtained dice coefficient
            loss = loss - dice.log()

        # get mean loss by class value
        loss = loss / self.num_classes

        return loss

In [None]:
def evaluate_model(model, x, y, loss_fun, metric):
    """
    """
    
    pred_logits = model(x)['out']
    pred_probs = pred_logits.softmax(dim=1)
    
    loss = loss_fun(pred_logits, y)

    max_indices = pred_probs.argmax(dim=1)

    score = metric(max_indices, y)

    return loss, score, max_indices

In [None]:
class CombinedCrossEntropySoftDice(torch.nn.Module):
    """
    """

    def __init__(self, loss_fn1, loss_fn2, weight1=0.5, weight2=0.5):
        super().__init__()
        self.loss_fn1 = loss_fn1
        self.loss_fn2 = loss_fn2
        self.weight1 = weight1
        self.weight2 = weight2

    def forward(self, preds_logits, targets):
        
        if isinstance(preds_logits, dict):
            preds_logits = preds_logits['out']

        preds_probs = preds_logits.softmax(dim=1)
        
        loss1 = self.loss_fn1(preds_probs, targets)
        loss2 = self.loss_fn2(preds_logits, targets)

        combined_loss = self.weight1*loss1 + self.weight2*loss2

        return combined_loss

In [None]:
def smart_optimizer(model, name="Adam", lr=0.001, momentum=0.9, decay=1e-5):
    """
    This implements weight decay.
    From: https://github.com/ultralytics/yolov5/blob/master/utils/torch_utils.py#L330
    """
    
    # YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay
    g = [], [], []  # optimizer parameter groups
    bn = tuple(v for k, v in torch.nn.__dict__.items() if "Norm" in k)  # normalization layers, i.e. BatchNorm2d()
    for v in model.modules():
        for p_name, p in v.named_parameters(recurse=0):
            if p_name == "bias":  # bias (no decay)
                g[2].append(p)
            elif p_name == "weight" and isinstance(v, bn):  # weight (no decay)
                g[1].append(p)
            else:
                g[0].append(p)  # weight (with decay)

    if name == "Adam":
        optimizer = torch.optim.Adam(g[2], lr=lr, betas=(momentum, 0.999))  # adjust beta1 to momentum
    elif name == "AdamW":
        optimizer = torch.optim.AdamW(g[2], lr=lr, betas=(momentum, 0.999), weight_decay=0.0)
    elif name == "RMSProp":
        optimizer = torch.optim.RMSprop(g[2], lr=lr, momentum=momentum)
    elif name == "SGD":
        optimizer = torch.optim.SGD(g[2], lr=lr, momentum=momentum, nesterov=True)
    else:
        raise NotImplementedError(f"Optimizer {name} not implemented.")

    optimizer.add_param_group({"params": g[0], "weight_decay": decay})  # add g0 with weight_decay
    optimizer.add_param_group({"params": g[1], "weight_decay": 0.0})  # add g1 (BatchNorm2d weights)
    print(
        f"{'optimizer:'} {type(optimizer).__name__}(lr={lr}) with parameter groups "
        f'{len(g[1])} weight(decay=0.0), {len(g[0])} weight(decay={decay}), {len(g[2])} bias'
    )
    return optimizer

In [None]:
# Produces a loss around 10
loss_fun1 = SoftDiceLoss(num_classes=config.NUM_CLASSES).to(device)
# Using reduction='mean' produces a los of around 2, using reduction=sum produces a total loss in the order of millions
# Receives logits
loss_fun2 = smp.losses.FocalLoss("multiclass", ignore_index=config.MASK_FILL_VALUE, normalized=False, reduction='mean').to(device)

loss_fun = CombinedCrossEntropySoftDice(loss_fun1, loss_fun2, weight1=1, weight2=1).to(device)

scorer = DiceScore(num_classes=config.NUM_CLASSES).to(device)

# optimizer = smart_optimizer(model, "SGD", lr=config.INITIAL_LR, momentum=config.MOMENTUM, decay=config.WEIGHT_DECAY)
# optimizer = torch.optim.SGD(model.parameters(), lr=config.INITIAL_LR, momentum=config.MOMENTUM, nesterov=True)
optimizer = torch.optim.AdamW(model.parameters(), lr=config.INITIAL_LR)

# From: https://github.com/ultralytics/yolov5/blob/95ebf68f92196975e53ebc7e971d0130432ad107/segment/train.py#L213
# lf = lambda x: (1 - x / config.EPOCHS) * (1.0 - lrf) + lrf  # linear
# scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)  # plot_lr_scheduler(optimizer, scheduler, epochs)

scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=config.FINAL_LR, steps_per_epoch=len(train_dataloader), epochs=config.EPOCHS)

In [None]:
# # Find best initial Learning Rate
# temp_opt = torch.optim.SGD(model.parameters(), lr=config.INITIAL_LR, momentum=config.MOMENTUM, nesterov=True)
# lr_finder = LRFinder(model, temp_opt, loss_fun, device=device)
# # Using it with val_loader=valid_dataloader is super slow
# lr_finder.range_test(train_dataloader, end_lr=1, num_iter=100)
# lr_finder.plot()
# lr_finder.reset()

In [None]:
# Extract the best Learning Rate
# lrs = np.array(lr_finder.history["lr"])
# losses = np.array(lr_finder.history["loss"])
# 
# min_grad_idx = None
# try:
#     min_grad_idx = (np.gradient(np.array(losses))).argmin()
# except ValueError:
#     print("Failed to compute the gradients, there might not be enough points.")
# if min_grad_idx is not None:
#     best_lr = lrs[min_grad_idx]
# 
# print(f"Best lr:", best_lr)

# Setup the optimizer with the new Learning Rate
# optimizer = torch.optim.SGD(model.parameters(), lr=best_lr, momentum=0.9)

In [None]:
H = {"train_loss": [], "train_score": [], "valid_loss": [], "valid_score": [], 'per_class_score': []}

for e in range(0, config.EPOCHS):

    print("\n[INFO] EPOCH: {}/{}".format(e + 1, config.EPOCHS))

    model.train()

    total_epoch_train_loss = 0
    total_epoch_valid_loss = 0

    total_epoch_train_score = 0
    total_epoch_valid_score = 0

    train_steps = len(train_dataset) // config.BATCH_SIZE
    valid_steps = len(valid_dataset) // config.BATCH_SIZE

    train_prog_bar = tqdm(train_dataloader, total=train_steps)
    for batch_index, (x, y) in enumerate(train_prog_bar):

        y = y.squeeze()
        (x, y) = (x.to(device, dtype=torch.float32), y.to(device, dtype=torch.long))

        train_loss, train_score, pred = evaluate_model(model, x, y, loss_fun, scorer)
        
        # For en explanation of this, see "MLOps Engineering at Scale-Manning (2022), Ch 8.1.3"
        train_loss = train_loss / config.GRADIENT_ACCUMULATION_STEPS

        total_epoch_train_loss += train_loss.item()
        train_loss.backward()
        
        total_epoch_train_score += np.nanmean(train_score)

        # Gradient accumulation
        if ((batch_index + 1) % config.GRADIENT_ACCUMULATION_STEPS == 0) or (batch_index + 1 == len(train_dataloader)):
            
            # Weights update
            optimizer.step()
            optimizer.zero_grad()
            
            # Optimizer Learning Rate update
            scheduler.step()

        train_prog_bar.set_description(desc=f"Training loss: {train_loss.item():.4f} | Mean Dice score: {np.nanmean(train_score):.2f}")

    # Switch off autograd for evaluation on the validation set
    with torch.no_grad():
        model.eval()

        valid_prog_bar = tqdm(valid_dataloader, total=valid_steps)
        for i, (x, y) in enumerate(valid_prog_bar):
            y = y.squeeze()
            (x, y) = (x.to(device, dtype=torch.float32), y.to(device, dtype=torch.long))

            valid_loss, valid_score, pred = evaluate_model(model, x, y, loss_fun, scorer)

            total_epoch_valid_loss += valid_loss.item()
            total_epoch_valid_score += np.nanmean(valid_score)

            valid_prog_bar.set_description(desc=f"Validation loss: {valid_loss.item():.4f} | Mean Dice score: {np.nanmean(valid_score):.2f}")

    avg_train_loss = total_epoch_train_loss / train_steps
    avg_valid_loss = total_epoch_valid_loss / valid_steps

    avg_train_score = total_epoch_train_score / train_steps
    avg_valid_score = total_epoch_valid_score / valid_steps

    H["train_loss"].append(avg_train_loss)
    H["valid_loss"].append(avg_valid_loss)
    H["train_score"].append(avg_train_score)
    H["valid_score"].append(avg_valid_score)
    H["per_class_score"].append(valid_score)

    print("Epoch train loss: {:.6f} | Epoch train mean Dice score: {:.4f}".format(avg_train_loss, avg_train_score))
    print("Epoch valid loss: {:.6f} | Epoch valid mean Dice score: {:.4f}".format(avg_valid_loss, avg_valid_score))

    # Serialize the model every 5 epochs
    if (e+1)%5 == 0:
        output_file_path = os.path.join(OUTPUT_PATH, f"deeplabv3_model_epoch_{e+1}.pkl")
        torch.save(model, output_file_path)

    # Update learning rate
#     if scheduler is not None:
#         if isinstance(scheduler, lr_scheduler.ReduceLROnPlateau):
#             scheduler.step(avg_valid_loss)
#             print("Bad Epochs:{}".format(scheduler.num_bad_epochs))
#             print(f"Last Learning Rate = {optimizer.param_groups[0]['lr']}")
#         else:
#             scheduler.step()
#             print(f"LR param group 0: {optimizer.param_groups[0]['lr']}")
#             print(f"LR param group 1: {optimizer.param_groups[1]['lr']}")
#             print(f"LR param group 2: {optimizer.param_groups[2]['lr']}")

## Plotting

In [None]:
x = [i for i in range(1, config.EPOCHS+1)]

### Plot losses

In [None]:
y = [H["train_loss"], H["valid_loss"]]
labels = ["Train loss", "Validation Loss"]
plot(x, y, labels)

### Plot scores

In [None]:
y = [H["train_score"], H["valid_score"]]
labels = ["Train mean Dice", "Validation mean Dice"]
plot(x, y, labels)

### Plot per-class Dice scores

In [None]:
ys = list(zip(*H["per_class_score"]))

fig, axes = plt.subplots(nrows=config.NUM_CLASSES, ncols=1, sharey=True, sharex=True, figsize=(8, 24))

for i in range(len(ys)):
    y = ys[i]
    axes[i].plot(x, y, label = f"Class {i}", marker='o')
    axes[i].legend(loc=2)
    axes[i].xaxis.set_tick_params(which='major', length=0)
    axes[i].xaxis.set_major_locator(MultipleLocator(1))
    axes[i].yaxis.set_major_locator(MultipleLocator(1))
    axes[i].yaxis.set_major_locator(MultipleLocator(0.5))
    axes[i].yaxis.set_minor_locator(MultipleLocator(0.1))
    axes[i].grid(which='major', color='black', linestyle='-')
    axes[i].grid(which='minor', color='gray', linestyle='-', alpha=0.2)

fig.supxlabel('Epoch')
fig.supylabel('Dice coefficient')

plt.tight_layout()
plt.show()

## <font style="color:green">4.2. Inference [3 Points]</font>

In [None]:
model.eval()

n_samples = 2

images, masks = next(iter(valid_dataloader))
images = images[:n_samples,...].to(device, dtype=torch.float32)

with torch.no_grad():
    preds = model(images.float())["out"].argmax(dim=1)

fig, ax = plt.subplots(nrows=n_samples, ncols=3, sharey=True, figsize=(10, 10))
for i in range(n_samples):

    image = images[i, ...]

    # CHW -> HWC
    image = image.permute(1, 2, 0).detach().cpu().numpy()

    mask = masks[i, ...]
    mask = torch.squeeze(mask)
    mask = mask.detach().cpu().numpy()

    pred = preds[i, ...].detach().cpu().numpy()

    ax[i][0].imshow(image)
    ax[i][0].set_xlabel("image")
    ax[i][0].set_xticks([])
    ax[i][0].set_yticks([])

    ax[i][1].imshow(mask)
    ax[i][1].set_xlabel("ground-truth mask")
    ax[i][1].set_xticks([])
    ax[i][1].set_yticks([])

    ax[i][2].imshow(pred)
    ax[i][2].set_xlabel("Prediction")
    ax[i][2].set_xticks([])
    ax[i][2].set_yticks([])

plt.tight_layout()
plt.gcf().canvas.draw()
plt.show()
plt.close(fig)

# <font style="color:green">5. Prepare Submission CSV [10 Points]</font>

Format:
```
ImageID,EncodedPixels
01_0,1 1 5 1
01_1,2 3 8 1
02_0,1 1
02_1,3 1
03_0,1 1
03_1,4 5
etc.
```

In [None]:
# From:
# https://www.kaggle.com/code/paulorzp/rle-functions-run-lenght-encode-decode

# def mask2rle(img):
#     '''
#     img: numpy array, 1 - mask, 0 - background
#     Returns run length as string formated
#     '''
#     pixels= img.T.flatten()
#     pixels = np.concatenate([[0], pixels, [0]])
#     runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
#     runs[1::2] -= runs[::2]
#     return ' '.join(str(x) for x in runs)


# def rle2mask(mask_rle, shape=(1600,256)):
#     '''
#     mask_rle: run-length as string formated (start length)
#     shape: (width,height) of array to return
#     Returns numpy array, 1 - mask, 0 - background

#     '''
#     s = mask_rle.split()
#     starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
#     starts -= 1
#     ends = starts + lengths
#     img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
#     for lo, hi in zip(starts, ends):
#         img[lo:hi] = 1
#     return img.reshape(shape).T

In [None]:
output_lines = ["ImageID,EncodedPixels"]

for image_id in tqdm(test_ids):
    image_path = os.path.join(DATA_PATH, "imgs/imgs", f"{image_id}.jpg")

    # Load image and mask
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    transformed = test_transforms(image=image)
    transformed = transformed['image'].to(device, dtype=torch.float32)
    transformed = transformed.unsqueeze(0)
    
    with torch.no_grad():
        pred_mask = model(transformed)['out'].argmax(dim=1)
        pred_mask = pred_mask.detach().cpu().numpy()

    for class_id in range(config.NUM_CLASSES):
        class_mask = (pred_mask == class_id)
        class_image = np.zeros_like(pred_mask)
        class_image[class_mask] = pred_mask[class_mask]
        class_image[class_image > 0] = 1

        pred_rle = rle_to_string(rle_encode(class_image))

        output_line = f"{image_id}_{class_id}, {pred_rle}"
        output_lines.append(output_line)

with open('submission.csv', "w") as f:
    out = "\n".join(line.strip() for line in output_lines)
    f.write(out)

In [None]:
pd.read_csv("/kaggle/working/submission.csv")

# <font style="color:green">6. Kaggle Profile Link [50 Points]</font>

Share your Kaggle profile link here with us so that we can give points for the competition score.

You should have a minimum IoU of `0.60` on the test data to get all points. If the IoU is less than `0.55`, you will not get any points for the section.

**You must have to submit `submission.csv` (prediction for images in `test.csv`) in `Submit Predictions` tab in Kaggle to get any evaluation in this section.**