# Architectures

The aim of this notebook is to test different encoder-decoder combinations.Based on the earlier results, we limited the decoders to two most successful ones:

- [Unet++](https://arxiv.org/pdf/1807.10165.pdf)
- [DeepLabV3](https://arxiv.org/abs/1706.05587)

These will now be combined with different encoders:
- [ResNet](https://arxiv.org/abs/1512.03385)
- [ResNeXt](https://arxiv.org/abs/1611.05431)
- [VGG](https://arxiv.org/abs/1409.1556)
- [EfficientNet](https://arxiv.org/abs/1905.11946)
- [Inception](https://arxiv.org/abs/1409.4842)

## Google Colab

The first cell will only be run in Google Colab, the second one locally as well.

In [2]:
import sys

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    # noinspection PyUnresolvedReferences
    from google.colab import drive
    drive.mount('/content/drive')

In [3]:
import os
import glob

# let's keep this cell at the beginning for every notebook
# for more convenient training in Google Colab
def get_root_path(filename: str) -> str: 
    """Get root path based on notebook's name."""
    filepath = glob.glob(os.getcwd() + '/**/' + filename, recursive = True)[0]
    return os.path.dirname(os.path.dirname(filepath))

ROOT_PATH = get_root_path('backbones.ipynb')
sys.path.append(ROOT_PATH)

# go to the drive directory
os.chdir(ROOT_PATH) if IN_COLAB else None

## Imports

In [4]:
import os
import cv2
import torch

import albumentations as A
import segmentation_models_pytorch as smp

from sklearn.model_selection import KFold
from scripts.evaluation import EvaluationMonitor, get_best_f1_per_setup
from scripts.preprocessing import RoadDataset, split_data
from scripts.training import train_model, setup_seed
from torch.utils.data import DataLoader, SubsetRandomSampler

In [5]:
# necessary for downloading some of the models
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [6]:
setup_seed(16)

## Data

In [5]:
# specify train directory
train_directory = os.path.join(ROOT_PATH, 'data', 'raw', 'train')

In [6]:
# image paths so that all the images are used for train dataset (no test set for cv due to small training set)
image_path_train, _, mask_path_train, _ = split_data(train_directory, test_size=0)

# create train Dataset without transformations for now
train_dataset = RoadDataset(image_path_train, mask_path_train)

In [7]:
# define transformations
train_tf = A.Compose([
    A.Resize(height=608, width=608, always_apply=True),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(p=0.5, limit=180, border_mode=cv2.BORDER_CONSTANT, rotate_method="ellipse"),
    A.RandomBrightnessContrast(p=0.5),
    A.RandomSnow(p=0.1)
])

valid_tf = A.Compose([A.Resize(height=608, width=608, always_apply=True)])

## Encode-Decoder Combinations



In [8]:
# specify the root path for evaluation json-s
evaluation_dir = os.path.join(ROOT_PATH, 'data', 'results', 'backbone')
monitor = EvaluationMonitor(evaluation_dir)
ENC_DEC_COMBINATIONS = monitor.get_not_updated_models()
ENC_DEC_COMBINATIONS = ENC_DEC_COMBINATIONS

In [9]:
ENC_DEC_COMBINATIONS

[('mit_b2', 'Unet'),
 ('resnext50_32x4d', 'DeepLabV3'),
 ('resnext50_32x4d', 'UnetPlusPlus'),
 ('efficientnet-b7', 'DeepLabV3'),
 ('efficientnet-b7', 'UnetPlusPlus')]

## Hyperparameters

Since our aim is to see, how different architectures influence the training, we fix the model, epochs and batch sizes.

In [10]:
ENCODER_WEIGHTS = 'imagenet'

PARAMETER_COUNT = {
    'resnet101': 42,
    'resnext50_32x4d': 22,
    'vgg19': 20,
    'efficientnet-b7': 63,
    'inceptionv4': 41
}

SEED = 16
BATCH_SIZE = 4
K_FOLD = 3
N_CPU = os.cpu_count()
N_EPOCHS = 150

LOADER_PARAMS = {
    'batch_size': BATCH_SIZE,
    'num_workers': N_CPU,
    'persistent_workers': True
}

## Cross-Validation

In [None]:
for encoder, decoder in ENC_DEC_COMBINATIONS:

    print(f'encoder-decoder: {encoder}-{decoder}')

    k_fold = KFold(n_splits=K_FOLD, shuffle=True, random_state=SEED)

    # Record K-fold results in a (K_FOLD, num_epoch) matrix
    training_loss_matrix = []
    validation_loss_matrix = []
    training_f1_matrix = []
    validation_f1_matrix = []

    # Get training and validation indices
    for fold, (train_idx, val_idx) in enumerate(k_fold.split(train_dataset)):

        print(f'fold: {fold}')

        # Create training and validation loaders by providing current K-Fold train/validation indices to Sampler
        train_loader = DataLoader(train_dataset.set_tf(train_tf), sampler=SubsetRandomSampler(train_idx), **LOADER_PARAMS)
        valid_loader = DataLoader(train_dataset.set_tf(valid_tf), sampler=SubsetRandomSampler(val_idx), **LOADER_PARAMS)

        # Initialize model
        model_ = smp.create_model(decoder, encoder_name=encoder, encoder_weights=ENCODER_WEIGHTS)
        criterion_ = smp.losses.DiceLoss(smp.losses.BINARY_MODE, from_logits=True)
        optimizer_ = torch.optim.Adam(model_.parameters(), lr=0.0005)
        scheduler_ = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer_,
            T_max=(len(train_loader.dataset) * N_EPOCHS) // train_loader.batch_size,
        )

        # Train model
        train_losses, valid_losses, train_f1s, valid_f1s = train_model(
            model_, (train_loader, valid_loader), criterion_, optimizer_, scheduler_, N_EPOCHS
        )

        # Save epoch results
        training_loss_matrix.append(train_f1s)
        validation_loss_matrix.append(train_f1s)
        training_f1_matrix.append(train_f1s)
        validation_f1_matrix.append(valid_f1s)
    
    monitor.update_metrics(
        (encoder, decoder),
        training_f1=training_f1_matrix,
        training_loss=training_loss_matrix,
        validation_f1=validation_f1_matrix,
        validation_loss=validation_loss_matrix
    )
    monitor.update_jsons()

## Metrics

CV was run on colab, the results saved to JSON, and now we can start analysing them.

In [16]:
stats_df = get_best_f1_per_setup(monitor.metrics['validation_f1'])
stats_df

Unnamed: 0,top_f1,std_dev
"(resnet101, UnetPlusPlus)",0.851941,0.005886
"(resnet101, DeepLabV3)",0.85206,0.020864
"(vgg19, UnetPlusPlus)",0.837727,0.018677
"(inceptionv4, UnetPlusPlus)",0.865252,0.013816
