# Optuna study

> Combine it with papermill and wandb for seamless hyperparameter tuning

In [1]:
import sys
sys.path.append('..')
import os
import optuna
from tsai.optuna import *
from tsai.basics import load_object
import papermill as pm
from fastcore.basics import *
from optuna.distributions import *
from optuna.samplers import TPESampler
import wandb
from swdf.metrics import ValidationMetricsHandler
from swdf.losses import LossFactory
from swdf.utils import run_optuna_study, filter_nb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
losses = list(LossFactory.losses.keys())
primary_losses = losses[:-2] # Primary losses are the ones that do not need another loss to function.

In [3]:
val_metrics = ValidationMetricsHandler([
    'R_Correlation',
    'Kurtosis_Difference',
    'Skewness_Difference',
    'MSA',
    'SSPB'
]) 

# The order of the metrics will be the one that appears when listing them.
val_metrics.list()

Metric Name,Description
Skewness_Difference,"Calculate the absolute difference in skewness between the actual and predicted values, which measures the asymmetry of the data distribution. Parameters: y_true (torch.Tensor): Actual values tensor of shape (batch_size, variables, horizon) y_pred (torch.Tensor): Predicted values tensor of the same shape as y_true Returns: torch.Tensor: Absolute difference in skewness between y_true and y_pred"
Kurtosis_Difference,"Calculate the absolute difference in kurtosis between the actual and predicted values, which measures the tailedness of the data distribution. Parameters: y_true (torch.Tensor): Actual values tensor of shape (batch_size, variables, horizon) y_pred (torch.Tensor): Predicted values tensor of the same shape as y_true Returns: torch.Tensor: Absolute difference in kurtosis between y_true and y_pred"
R_Correlation,"Calculate the Pearson Correlation Coefficient (R Correlation) between true and predicted values. Parameters: y_true (torch.Tensor): Actual values tensor of shape (batch_size, variables, horizon) y_pred (torch.Tensor): Predicted values tensor of the same shape as y_true Returns: torch.Tensor: R Correlation coefficient"
MSA,"Calculate the Median Symmetric Accuracy (MSA). Parameters: y_true (torch.Tensor): Actual values tensor of shape (batch_size, variables, horizon) y_pred (torch.Tensor): Predicted values tensor of the same shape as y_true Returns: torch.Tensor: MSA value"
SSPB,"Calculate the Symmetric Signed Percentage Bias (SSPB), which measures the percentage bias with consideration for the direction of the bias. Parameters: y_true (torch.Tensor): Actual values tensor of shape (batch_size, variables, horizon) y_pred (torch.Tensor): Predicted values tensor of the same shape as y_true Returns: torch.Tensor: SSPB value"


In [4]:
config = AttrDict(
    study_name = 'general_study_extended', # name of the Optuna study
    study_type = 'bayesian', # 'bayesian' or 'gridsearch' or 'random'
    n_trials = 100, # number of trials
    train_nb = f'{os.getcwd()}/solfsmy_train_losses.ipynb', # path to the notebook to be executed
    search_space = {
        "arch.attn_dropout": DiscreteUniformDistribution(0.0, 0.5, 0.1),
        "arch.d_model": IntUniformDistribution(32, 1024, 32),
        "arch.d_ff": IntUniformDistribution(32, 4096, 32),
        "arch.decomposition": CategoricalDistribution([True, False]),
        "arch.dropout": DiscreteUniformDistribution(0.0, 0.5, 0.1), 
        "arch.individual": CategoricalDistribution([True, False]),
        "arch.n_layers": IntUniformDistribution(1, 6, 1),
        "arch.n_heads": CategoricalDistribution([2, 4, 8, 16, 32]),
        "init_weights": CategoricalDistribution([True, False]), # true = kaiming
        "lookback": CategoricalDistribution([18, 24, 36, 128, 192]),
        "loss_func": CategoricalDistribution(losses),
    },
    loss_config = {
        "delta": FloatDistribution(0.2, 10., step=0.2),
        "quantile": FloatDistribution(0.1, 0.9, step=0.05),
        "alpha": FloatDistribution(0.1, 0.9, step=0.05),
        "primary_loss": CategoricalDistribution(primary_losses),
    },
    # Add extra parameters that are fixed, but not part of the search space
    extra_params = {
        "n_epoch": 100,
        "bs": 64,
        "is_optuna_study": True,
        "metrics_handler_path": val_metrics.save(),
        "main_metric": 'R_Correlation',
    },
    use_wandb = True,
    wandb_mode = 'offline'
)

%store -d best_valid_metrics                                            

config

tmp directory already exists.
ValidationMetricsHandler saved as tmp/metrics.pkl


```json
{ 'extra_params': { 'bs': 64,
                    'is_optuna_study': True,
                    'main_metric': 'R_Correlation',
                    'metrics_handler_path': 'tmp/metrics.pkl',
                    'n_epoch': 1},
  'loss_config': { 'alpha': FloatDistribution(high=0.9, log=False, low=0.1, step=0.05),
                   'delta': FloatDistribution(high=10.0, log=False, low=0.2, step=0.2),
                   'primary_loss': CategoricalDistribution(choices=('MSE', 'MAE', 'MSLE', 'RMSLE', 'Hubber', 'Quantile', 'wMSE', 'wMAE', 'wMSLE', 'wRMSLE', 'wHubber', 'wQuantile')),
                   'quantile': FloatDistribution(high=0.9, log=False, low=0.1, step=0.05)},
  'n_trials': 100,
  'search_space': { 'arch.attn_dropout': DiscreteUniformDistribution(high=0.5, low=0.0, q=0.1),
                    'arch.d_ff': IntUniformDistribution(high=512, low=32, step=32),
                    'arch.d_model': IntUniformDistribution(high=512, low=32, step=32),
                    'arch.decomposition': CategoricalDistribution(choices=(True, False)),
                    'arch.dropout': DiscreteUniformDistribution(high=0.5, low=0.0, q=0.1),
                    'arch.individual': CategoricalDistribution(choices=(True, False)),
                    'arch.n_heads': CategoricalDistribution(choices=(2, 4, 8, 16, 32)),
                    'arch.n_layers': IntUniformDistribution(high=6, low=1, step=1),
                    'init_weights': CategoricalDistribution(choices=(True, False)),
                    'lookback': CategoricalDistribution(choices=(18, 24, 36, 128, 192)),
                    'loss_func': CategoricalDistribution(choices=('MSE', 'MAE', 'MSLE', 'RMSLE', 'Hubber', 'Quantile', 'wMSE', 'wMAE', 'wMSLE', 'wRMSLE', 'wHubber', 'wQuantile', 'Classification', 'Trended'))},
  'study_name': 'general_study_extended',
  'study_type': 'bayesian',
  'train_nb': '/workspaces/sw-driver-forecaster/dev_nbs/solfsmy_train_losses.ipynb',
  'use_wandb': True,
  'wandb_mode': 'offline'}
```

In [5]:
def create_objective(train_nb, search_space, extra_params=None, use_wandb=False):
    """
        Create objective function to be minimized by Optuna.
        Inputs:
            trial: Optuna trial object
            train_nb: path to the training notebook
            search_vars: keys of the search space to be used
            wandb_group: name of the wandb group to be used
        Output:
            valid_loss: validation loss
    """
    def objective(trial:optuna.Trial):
        # Define the parameters to be passed to the training notebook through papermill
        pm_parameters = {}
        for k,v in search_space.items():
            pm_parameters['config.' + k] = trial._suggest(k, v)
            if k == 'loss_func':
                pm_parameters['config.loss_config'] = {}

                if pm_parameters['config.loss_func'] in ['Hubber', 'wHubber']:
                    pm_parameters['config.loss_config'] = {
                        'delta': trial._suggest('delta', config.loss_config['delta'])
                    }

                if pm_parameters['config.loss_func'] in ['Quantile', 'wQuantile']:
                    pm_parameters['config.loss_config'] = {
                        'quantile': trial._suggest('quantile', config.loss_config['quantile'])
                    }

                if pm_parameters['config.loss_func'] in ['Classification', 'Trended']:
                    pm_parameters['config.loss_config'] = {}

                    primary_loss = trial._suggest('primary_loss', config.loss_config['primary_loss'])
                    pm_parameters['config.loss_config']['primary_loss'] = primary_loss

                    if primary_loss in ['Hubber', 'wHubber']:
                        pm_parameters['config.loss_config']['delta'] = trial._suggest('delta', config.loss_config['delta'])

                    if primary_loss in ['Quantile', 'wQuantile']:
                        pm_parameters['config.loss_config']['quantile'] = trial._suggest('quantile', config.loss_config['quantile'])
                    
                    if pm_parameters['config.loss_func'] == 'Classification':
                        pm_parameters['config.loss_config']['alpha'] = trial._suggest('alpha', config.loss_config['alpha'])
                
                print(f"Loss function values: {pm_parameters['config.loss_config']}")

        # Add the extra parameters to the dictionary. The key of every parameter 
        # must be 'config.<param_name>'
        if extra_params is not None:
            for k,v in extra_params.items():
                pm_parameters['config.' + k] = v
                
        # If using wandb, enable that in the training runs, all of them gathered
        # into a group (NOTE: The train nb must have and use these config arguments)
        if use_wandb:
            pm_parameters['config.use_wandb'] = True
            pm_parameters['config.wandb_group'] = config.study_name + '_runs'

        # Call the training notebook using papermill (don't print the output)
        stdout_file = open('tmp/pm_stdout.txt', 'w')
        stderr_file = open('tmp/pm_stderr.txt', 'w')

        pm.execute_notebook(
            train_nb,
            './tmp/pm_output.ipynb',
            parameters = pm_parameters,
            stdout_file = stdout_file,
            stderr_file = stderr_file
        )

        # Close the output files
        stdout_file.close()
        stderr_file.close()

        # Get the output value of interest from the source notebook

        %store -r valid_metrics
        objective_values = val_metrics.get_objective_values(valid_metrics, show_metrics=True)

        
        return *objective_values,

    return objective

In [None]:
# Skip the cells that are unnecesary as diagram plots or data update
train_nb = filter_nb(config.train_nb, ['skip'])

obj = create_objective(train_nb, config.search_space, 
                       extra_params=config.extra_params, use_wandb=True)
study = run_optuna_study(obj, study_type='bayesian', direction=val_metrics.get_study_directions(), path='./tmp',
                 study_name=config.study_name, n_trials=config.n_trials)

In [None]:
run = wandb.init(config=config, mode=config.wandb_mode, 
                 job_type='optuna-study') if config.use_wandb else None

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


In [None]:
if run is not None:
    # Log best trials and their corresponding values
    best_trials_info = {}
    for i, trial in enumerate(study.best_trials):
        best_trials_info[f'best_params_{i}'] = trial.params
        best_trials_info[f'best_value_{i}'] = trial.values
        best_trials_info[f'best_trial_number_{i}'] = trial.number
    
    run.log(best_trials_info)
    run.log_artifact(f'./tmp/{config.study_name}.pkl', type='optuna_study')
    
    # Log visualizations
    for i in range(len(study.directions)):  # Assuming each objective has a direction
            run.log({
                f'contour_obj_{i}': optuna.visualization.plot_contour(study, target=lambda t: t.values[i], target_name=f'Objective {i}'),
                f'edf_obj_{i}': optuna.visualization.plot_edf(study, target=lambda t: t.values[i], target_name=f'Objective {i}'),
                f'optimization_history_obj_{i}': optuna.visualization.plot_optimization_history(study, target=lambda t: t.values[i], target_name=f'Objective {i}'),
                f'parallel_coordinate_obj_{i}': optuna.visualization.plot_parallel_coordinate(study, target=lambda t: t.values[i], target_name=f'Objective {i}'),
                f'param_importances_obj_{i}': optuna.visualization.plot_param_importances(study, target=lambda t: t.values[i], target_name=f'Objective {i}'),
                f'slice_obj_{i}': optuna.visualization.plot_slice(study, target=lambda t: t.values[i], target_name=f'Objective {i}')
            })


In [None]:
if run is not None:
    run.finish()

0,1
best_trial_number_0,▁
best_trial_number_1,▁
best_trial_number_10,▁
best_trial_number_11,▁
best_trial_number_12,▁
best_trial_number_13,▁
best_trial_number_14,▁
best_trial_number_15,▁
best_trial_number_16,▁
best_trial_number_17,▁

0,1
best_trial_number_0,2
best_trial_number_1,3
best_trial_number_10,22
best_trial_number_11,24
best_trial_number_12,25
best_trial_number_13,26
best_trial_number_14,32
best_trial_number_15,34
best_trial_number_16,35
best_trial_number_17,36
