# Hyperparameter Optimization

TODO: update

## Google Colab

The first cell will only be run in Google Colab, the second one locally as well.

In [1]:
import json
import sys

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    # noinspection PyUnresolvedReferences
    from google.colab import drive
    drive.mount('/content/drive')

In [2]:
import os
import glob

# let's keep this cell at the beginning for every notebook
# for more convenient training in Google Colab
def get_root_path(filename: str) -> str: 
    """Get root path based on notebook's name."""
    filepath = glob.glob(os.getcwd() + '/**/' + filename, recursive = True)[0]
    return os.path.dirname(os.path.dirname(filepath))

ROOT_PATH = get_root_path('hyperopt.ipynb')
sys.path.append(ROOT_PATH)

# go to the drive directory
os.chdir(ROOT_PATH) if IN_COLAB else None

## Imports

In [3]:
import os
import cv2

import albumentations as A
import segmentation_models_pytorch as smp
import flaml

from ray import tune
from scripts.preprocessing import RoadDataset, split_data
from scripts.training import setup_seed, tune_hyperparams

In [4]:
# necessary for downloading some of the models
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [5]:
SEED = 16
setup_seed(16)

## Data

In [6]:
# specify train directory
train_directory = os.path.join(ROOT_PATH, 'data', 'raw', 'train')

In [7]:
# define transformations
train_tf = A.Compose([
    A.Resize(height=608, width=608, always_apply=True),
    A.Rotate(p=0.5, limit=180, border_mode=cv2.BORDER_CONSTANT, rotate_method="ellipse"),
    A.RandomBrightnessContrast(p=0.5)
])

valid_tf = A.Compose([A.Resize(height=608, width=608, always_apply=True)])

In [8]:
image_path_train, image_path_val, mask_path_train, mask_path_val = split_data(train_directory, 0.2)

# get train and val dataset instances
train_dataset = RoadDataset(image_path_train, mask_path_train, train_tf)
val_dataset = RoadDataset(image_path_val, mask_path_val, valid_tf)
ds = (train_dataset, val_dataset)

## Hyperparameters

In [14]:
ENCODER = 'efficientnet-b4'
DECODER = 'UnetPlusPlus'

In [15]:
max_num_epoch = 150
time_budget_s = 9000     # 2.5 hours 
num_samples = 500

config = {
    "lr": tune.loguniform(1e-4, 1e-1),
    "num_epochs": tune.loguniform(1, max_num_epoch),
    "batch_size": tune.randint(1, 9),
    "criterion": tune.choice(["dice_loss", "focal_loss"])
}

## Tuning

In [None]:
result = flaml.tune.run(
    tune.with_parameters(tune_hyperparams, encoder=ENCODER, decoder=DECODER, datasets=ds), 
    config=config,
    metric="f1",
    mode="max",
    low_cost_partial_config={"num_epochs": 1},
    resources_per_trial={'gpu': 1},
    max_resource=max_num_epoch,
    local_dir='logs/',
    time_budget_s=time_budget_s,
    num_samples=num_samples,
    use_ray=True
)

## Save Best Config

In [25]:
best_config = result.get_best_trial("f1", "max", "all").config.copy()

In [26]:
best_config

{'num_epochs': 1.0, 'lr': 0.00019601811957324142, 'batch_size': 1}

In [27]:
config_path = os.path.join(ROOT_PATH, 'data', 'results', 'hyperopt', 'configs.json')
with open(config_path, 'r') as file:
  data = json.load(file)
  
model = "+".join([ENCODER, DECODER])
data[model] = best_config

In [20]:
data

{'resnet18+unet': {'num_epochs': 1.0,
  'lr': 0.00019601811957324142,
  'batch_size': 1}}

In [29]:
best_config.pop('criterion')

with open(config_path, 'w') as file:
  json.dump(data, file)