# Optuna study

> Combine it with papermill and wandb for seamless hyperparameter tuning

In [1]:
import sys
sys.path.append('..')
import os
import optuna
from tsai.optuna import *
from tsai.basics import load_object
import papermill as pm
from tsai.optuna import run_optuna_study
from fastcore.basics import *
from optuna.distributions import *
from optuna.samplers import TPESampler
import wandb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
config = AttrDict(
    study_name='general_study_extended',  # name of the Optuna study
    study_type='bayesian',  # 'bayesian' or 'gridsearch' or 'random'
    n_trials=30,  # number of trials
    train_nb=f'{os.getcwd()}/geodstap_train.ipynb',  # path to the notebook to be executed
    search_space={
        "arch.attn_dropout": DiscreteUniformDistribution(0.0, 0.5, 0.1),
        # "arch.d_model": IntUniformDistribution(32, 2048, 64),
        # "arch.d_ff": IntUniformDistribution(32, 4096, 128), # Not used in TSiT
        # "arch.decomposition": CategoricalDistribution([True, False]), # Not used in TSiT
        "arch.dropout": DiscreteUniformDistribution(0.0, 0.5, 0.1),
        # "arch.individual": CategoricalDistribution([True, False]), # Not used in TSiT
        # "arch.n_layers": IntUniformDistribution(1, 6, 1),
        # "arch.n_heads": CategoricalDistribution([2, 4, 8, 16, 32]),
        # "arch.patch_len": CategoricalDistribution([4, 8, 16, 32, 64, 128]), # Not used in TSiT
        # "init_weights": CategoricalDistribution([True, False]), # Not used in TSiT
        "lookback": CategoricalDistribution([144, 192, 240, 288, 336]), # Not used in TSiT
        "deltaHL": FloatDistribution(1., 15., step=1.), # Not used in TSiT
    },
    # Add extra parameters that are fixed, but not part of the search space
    extra_params={
        "n_epoch": 30,
        "bs": 32,
        "is_optuna_study": True,
        "arch_name": 'TSiTPlus',   
        "arch.d_model": 128,
        "arch.n_heads": 8
    },
    use_wandb=True,
    wandb_mode='offline'
)

%store -d best_valid_loss                                             

config

```json
{ 'extra_params': { 'arch.d_model': 128,
                    'arch.n_heads': 8,
                    'arch_name': 'TSiTPlus',
                    'bs': 32,
                    'is_optuna_study': True,
                    'n_epoch': 30},
  'n_trials': 30,
  'search_space': { 'arch.attn_dropout': DiscreteUniformDistribution(high=0.5, low=0.0, q=0.1),
                    'arch.dropout': DiscreteUniformDistribution(high=0.5, low=0.0, q=0.1),
                    'deltaHL': FloatDistribution(high=15.0, log=False, low=1.0, step=1.0),
                    'lookback': CategoricalDistribution(choices=(144, 192, 240, 288, 336))},
  'study_name': 'general_study_extended',
  'study_type': 'bayesian',
  'train_nb': '/workspaces/sw-driver-forecaster/dev_nbs/geodstap_train.ipynb',
  'use_wandb': True,
  'wandb_mode': 'offline'}
```

In [3]:
def create_objective(train_nb, search_space, extra_params=None, use_wandb=False):
    """
        Create objective function to be minimized by Optuna.
        Inputs:
            trial: Optuna trial object
            train_nb: path to the training notebook
            search_vars: keys of the search space to be used
            wandb_group: name of the wandb group to be used
        Output:
            valid_loss: validation loss
    """
    def objective(trial:optuna.Trial):
        # Define the parameters to be passed to the training notebook through papermill
        pm_parameters = {}
        for k,v in search_space.items():
            pm_parameters['config.' + k] = trial._suggest(k, v)

        # Add the extra parameters to the dictionary. The key of every parameter 
        # must be 'config.<param_name>'
        if extra_params is not None:
            for k,v in extra_params.items():
                pm_parameters['config.' + k] = v
                
        # If using wandb, enable that in the training runs, all of them gathered
        # into a group (NOTE: The train nb must have and use these config arguments)
        if use_wandb:
            pm_parameters['config.use_wandb'] = True
            pm_parameters['config.wandb_group'] = config.study_name + '_runs'

        # Call the training notebook using papermill (don't print the output)
        stdout_file = open('tmp/pm_stdout.txt', 'w')
        stderr_file = open('tmp/pm_stderr.txt', 'w')

        pm.execute_notebook(
            train_nb,
            './tmp/pm_output.ipynb',
            parameters = pm_parameters,
            stdout_file = stdout_file,
            stderr_file = stderr_file,
            
        )

        # Close the output files
        stdout_file.close()
        stderr_file.close()

        # Get the output value of interest from the source notebook
        loss = None
        %store -r valid_loss
        return valid_loss

    return objective

In [4]:
import nbformat

def filter_nb (path:str, skip_tags:list):
    # Load the notebook
    nb = nbformat.read(path, as_version=4)

    # Filter out cells with specific tags
    filtered_cells = [cell for cell in nb.cells if not set(skip_tags) & set(cell.metadata.get('tags', []))]
    nb.cells = filtered_cells
    
    return nb

In [None]:
# Skip the cells that are unnecesary as diagram plots or data update
train_nb = filter_nb(config.train_nb, ['skip'])

obj = create_objective(train_nb, config.search_space, 
                       extra_params=config.extra_params, use_wandb=True)
study = run_optuna_study(obj, study_type='bayesian', direction='minimize', path='./tmp',
                 study_name=config.study_name, n_trials=config.n_trials)

In [None]:
%store

In [None]:
os.environ['WANDB_NOTEBOOK_NAME'] = '/workspaces/sw-driver-forecaster/dev_nbs/optuna_study_geodstap.ipynb'

run = wandb.init(config=config, mode=config['wandb_mode'], job_type='optuna-study') if config['use_wandb'] else None

In [None]:
if run is not None:
    run.log(dict(study.best_params, **{'best_value': study.best_value, 
                                       'best_trial_number': study.best_trial.number}))
    run.log_artifact(f'./tmp/{config.study_name}.pkl', type='optuna_study')
    run.log({
        'contour': optuna.visualization.plot_contour(study),
        'edf': optuna.visualization.plot_edf(study),
        'intermediate_values': optuna.visualization.plot_intermediate_values(study),
        'optimization_history': optuna.visualization.plot_optimization_history(study),
        'parallel_coordinate' : optuna.visualization.plot_parallel_coordinate(study),
        'param_importances': optuna.visualization.plot_param_importances(study),
        'slice': optuna.visualization.plot_slice(study)
    })

[W 2024-08-13 13:59:37,863] Param arch.individual unique value length is less than 2.
[W 2024-08-13 13:59:37,865] Param arch.n_layers unique value length is less than 2.
[W 2024-08-13 13:59:37,866] Param init_weights unique value length is less than 2.
[W 2024-08-13 13:59:37,867] Param lookback unique value length is less than 2.
[W 2024-08-13 13:59:37,869] Param arch.individual unique value length is less than 2.
[W 2024-08-13 13:59:37,870] Param arch.n_layers unique value length is less than 2.
[W 2024-08-13 13:59:37,872] Param init_weights unique value length is less than 2.
[W 2024-08-13 13:59:37,874] Param lookback unique value length is less than 2.
[W 2024-08-13 13:59:37,875] Param arch.individual unique value length is less than 2.
[W 2024-08-13 13:59:37,876] Param arch.n_layers unique value length is less than 2.
[W 2024-08-13 13:59:37,877] Param init_weights unique value length is less than 2.
[W 2024-08-13 13:59:37,879] Param lookback unique value length is less than 2.
[W 2

In [None]:
if run is not None:
    run.finish()

0,1
arch.attn_dropout,▁
arch.d_ff,▁
arch.d_model,▁
arch.decomposition,▁
arch.dropout,▁
arch.individual,▁
arch.n_heads,▁
arch.n_layers,▁
best_trial_number,▁
best_value,▁

0,1
arch.attn_dropout,0.4
arch.d_ff,352
arch.d_model,416
arch.decomposition,False
arch.dropout,0.2
arch.individual,False
arch.n_heads,8
arch.n_layers,1
best_trial_number,0
best_value,58.26485
