# Hyperparameter Optimization

TODO: update

## Google Colab

The first cell will only be run in Google Colab, the second one locally as well.

In [1]:
import json
import sys

import numpy as np
from segmentation_models_pytorch.encoders import get_preprocessing_fn
from torch.utils.data import DataLoader

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    # noinspection PyUnresolvedReferences
    from google.colab import drive

    drive.mount('/content/drive')

In [2]:
import os
import glob


# let's keep this cell at the beginning for every notebook
# for more convenient training in Google Colab
def get_root_path(filename: str) -> str:
    """Get root path based on notebook's name."""
    filepath = glob.glob(os.getcwd() + '/**/' + filename, recursive=True)[0]
    return os.path.dirname(os.path.dirname(filepath))


ROOT_PATH = get_root_path('final_training.ipynb')
sys.path.append(ROOT_PATH)

# go to the drive directory
os.chdir(ROOT_PATH) if IN_COLAB else None

## Imports

In [3]:
import os
import cv2

import albumentations as A
import segmentation_models_pytorch as smp
import torch

from scripts.preprocessing import RoadDataset, split_data
from scripts.training import setup_seed, train_model

In [4]:
# necessary for downloading some of the models
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

In [5]:
SEED = 16
setup_seed(16)

## Data

In [6]:
# specify train directory
train_directory = os.path.join(ROOT_PATH, 'data', 'raw', 'train')

In [7]:
# best transformation based on the benchmarks
train_tf = A.Compose([
    A.Resize(height=608, width=608, always_apply=True),
    A.Rotate(p=0.5, limit=180, border_mode=cv2.BORDER_CONSTANT, rotate_method="ellipse"),
    A.RandomBrightnessContrast(p=0.5)
])

valid_tf = A.Compose([A.Resize(height=608, width=608, always_apply=True)])

In [8]:
image_path_train, _, mask_path_train, _ = split_data(train_directory, 0)

# get train and val dataset instances
train_dataset = RoadDataset(image_path_train, mask_path_train, train_tf)

## Hyperparameters

In [9]:
ENCODER = 'resnet18'
DECODER = 'unet'

In [10]:
config_path = os.path.join(ROOT_PATH, 'data', 'results', 'hyperopt', 'configs.json')
with open(config_path, 'r') as file:
  data = json.load(file)
  
model = "+".join([ENCODER, DECODER])
config = data[model]

## Training

In [18]:
# Create training and validation loaders by providing current K-Fold train/validation indices to Sampler
loader = DataLoader(train_dataset.set_tf(train_tf), config['batch_size'])

# Initialize model
model = smp.create_model(DECODER, encoder_name=ENCODER)
criterion = smp.losses.DiceLoss(smp.losses.BINARY_MODE, from_logits=True)
optimizer = torch.optim.Adam(model.parameters(), config['lr'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=(len(loader.dataset) * int(config['num_epochs'])) // loader.batch_size,
)

In [19]:
_ = train_model(
    model, (loader, None), criterion, optimizer, scheduler, int(config['num_epochs'])
)

Epoch:   1. Train.      Loss: 0.510 | f1: 0.543: 100%|██████████| 100/100 [01:53<00:00,  1.14s/it]


## Save State Dict

In [22]:
state_dict_path = os.path.join(ROOT_PATH, 'data', 'results', 'hyperopt', f'{ENCODER}+{DECODER}.pth')
model_name = "+".join([ENCODER, DECODER])

torch.save({'state_dict': model.state_dict()}, state_dict_path)