# Parameter search with optuna - ETNN

## Importing section

In [1]:
import optuna

import numpy as np

import torch
from sklearn.metrics import r2_score
from torch.utils.data import random_split, DataLoader

from etnn import TreeNode
from etnn.nn.layer_framework import LayerManagementFramework
from etnn.routines.run_config import choice_dataset, choice_trainloader, choice_loss, choice_optim
from etnn.tools.training import train_epoch, eval_epoch
from etnn.tools.training_tools import ConfigStore, seeding_all

## Definition of objective function for ETNN

In [3]:
def objective(trial):
    # init default config
    config = ConfigStore(
        in_dim=15,
        hidden_dim=trial.suggest_int("hidden_dim", 16, 512, step=16),
        out_dim=1,
        k=trial.suggest_int("k", 1, 5),
        dataset=-1,
        ds_size=10_000,
        num_gondolas=10,
        num_part_pg=5,
        loss_name='mse',
        optimizer_name='adam',
        num_max_epochs=30, # real: 100
        learning_rate=trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True),
        batch_size=1024,
        early_stop_tol=5,
        use_equal_batcher=trial.suggest_categorical("batcher", [True, False]),
        seed=420,
        label_type=label,
        final_label_factor=1/1000
    )

    # loading dataset
    dataset, df_index = choice_dataset(config, dataset_path)
    # splitting off test dataset
    generator = torch.Generator().manual_seed(config.seed)
    train_ds, val_ds, _ = random_split(
        dataset,
        [1 - test_perc - val_perc, val_perc, test_perc],
        generator=generator
    )

    # loaders
    train_loader = choice_trainloader(config, df_index, train_ds)
    val_loader = DataLoader(val_ds, batch_size=4 * config.batch_size, shuffle=False)

    # build tree
    tree_structure = TreeNode(
        node_type="C",
        children=[
            TreeNode("P", [TreeNode("E", config.num_part_pg)])
            for _ in range(config.num_gondolas)
        ]
    )

    # define device
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # set seed for reproducability
    seeding_all(config.seed)

    # define model
    model = LayerManagementFramework(
        in_dim=config.in_dim,
        tree=tree_structure,
        hidden_dim=config.hidden_dim,
        out_dim=config.out_dim,
        k=config.k
    ).to(device)

    # learning tools
    criterion = choice_loss(config)
    optimizer = choice_optim(config, model)

    # init score list
    score_list = []

    # train for specified number of epochs
    for epoch in range(config.num_max_epochs):
        _, _, _ = train_epoch(
            model,
            train_loader,
            optimizer,
            device,
            criterion
        )

        _, val_true_y, val_pred_y = eval_epoch(
            model,
            val_loader,
            device,
            criterion
        )

        # calc r2 score and append
        score = r2_score(y_true=val_true_y, y_pred=val_pred_y)
        score_list += [score]
        trial.report(score, epoch)

    # calculate objective
    # display(score_list)
    # idea: last x r2 scores (why not last one? for stability purposes)
    obj = np.array(score_list)[-stability_count:]
    return np.mean(obj)

## Tree advanced label

In [2]:
# setting global parameters
dataset_path = "../datasets/"
label = "tree_advanced" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [4]:
study_tree_advanced = optuna.create_study(study_name="Best tree advanced label config", directions=['maximize'])
study_tree_advanced.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-06 14:14:41,063] A new study created in memory with name: Best tree advanced label config


  0%|          | 0/200 [00:00<?, ?it/s]

[I 2023-12-06 14:16:16,163] Trial 0 finished with value: 0.977835170258216 and parameters: {'hidden_dim': 512, 'k': 1, 'learning_rate': 0.00011521278895657964, 'batcher': False}. Best is trial 0 with value: 0.977835170258216.
[I 2023-12-06 14:17:46,148] Trial 1 finished with value: -1.2182537060438932 and parameters: {'hidden_dim': 512, 'k': 4, 'learning_rate': 1.3043904324244627e-05, 'batcher': False}. Best is trial 0 with value: 0.977835170258216.
[I 2023-12-06 14:19:11,420] Trial 2 finished with value: -45.13049526861947 and parameters: {'hidden_dim': 448, 'k': 1, 'learning_rate': 0.0001684222214709156, 'batcher': True}. Best is trial 0 with value: 0.977835170258216.
[I 2023-12-06 14:20:37,669] Trial 3 finished with value: 0.7065345424868887 and parameters: {'hidden_dim': 432, 'k': 3, 'learning_rate': 8.67418895440181e-05, 'batcher': True}. Best is trial 0 with value: 0.977835170258216.
[I 2023-12-06 14:22:01,657] Trial 4 finished with value: -0.18986505702947146 and parameters: {'h

In [5]:
best_par_tree_advanced = study_tree_advanced.best_params
print(best_par_tree_advanced)

{'hidden_dim': 16, 'k': 2, 'learning_rate': 0.0027798626982959837, 'batcher': False}


In [6]:
study_tree_advanced.sampler.__class__.__name__

'TPESampler'

In [18]:
df_tree_advanced = study_tree_advanced.trials_dataframe()
df_tree_advanced.to_csv("study_label-tree-advanced_normalized.csv")
df_tree_advanced

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_hidden_dim,params_k,params_learning_rate,state
0,0,0.977835,2023-12-06 14:14:41.071680,2023-12-06 14:16:16.162481,0 days 00:01:35.090801,False,512,1,0.000115,COMPLETE
1,1,-1.218254,2023-12-06 14:16:16.164481,2023-12-06 14:17:46.148148,0 days 00:01:29.983667,False,512,4,0.000013,COMPLETE
2,2,-45.130495,2023-12-06 14:17:46.150148,2023-12-06 14:19:11.420434,0 days 00:01:25.270286,True,448,1,0.000168,COMPLETE
3,3,0.706535,2023-12-06 14:19:11.421433,2023-12-06 14:20:37.669756,0 days 00:01:26.248323,True,432,3,0.000087,COMPLETE
4,4,-0.189865,2023-12-06 14:20:37.671758,2023-12-06 14:22:01.657309,0 days 00:01:23.985551,False,304,4,0.000116,COMPLETE
...,...,...,...,...,...,...,...,...,...,...
195,195,0.741964,2023-12-06 19:45:39.547731,2023-12-06 19:47:03.510786,0 days 00:01:23.963055,False,64,3,0.007251,COMPLETE
196,196,-0.097855,2023-12-06 19:47:03.512787,2023-12-06 19:48:30.312245,0 days 00:01:26.799458,False,192,1,0.002807,COMPLETE
197,197,0.933515,2023-12-06 19:48:30.313750,2023-12-06 19:50:10.557533,0 days 00:01:40.243783,False,240,1,0.003866,COMPLETE
198,198,0.990716,2023-12-06 19:50:10.560536,2023-12-06 19:51:59.055739,0 days 00:01:48.495203,True,32,1,0.001768,COMPLETE


## Tree label

In [21]:
# setting global parameters
dataset_path = "../datasets/"
label = "tree" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [22]:
study_tree = optuna.create_study(study_name="Best tree label config", directions=['maximize'])
study_tree.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-06 20:20:50,394] A new study created in memory with name: Best tree label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-06 20:22:22,405] Trial 0 finished with value: -585595776802.6683 and parameters: {'hidden_dim': 288, 'k': 4, 'learning_rate': 0.007237783576337556, 'batcher': False}. Best is trial 0 with value: -585595776802.6683.
[I 2023-12-06 20:24:17,455] Trial 1 finished with value: -0.330745084412542 and parameters: {'hidden_dim': 320, 'k': 5, 'learning_rate': 0.004693019989987263, 'batcher': False}. Best is trial 1 with value: -0.330745084412542.
[I 2023-12-06 20:26:02,719] Trial 2 finished with value: -146.40988886406535 and parameters: {'hidden_dim': 96, 'k': 2, 'learning_rate': 0.0022928629972609584, 'batcher': False}. Best is trial 1 with value: -0.330745084412542.
[I 2023-12-06 20:27:55,419] Trial 3 finished with value: 0.1203045834559908 and parameters: {'hidden_dim': 144, 'k': 3, 'learning_rate': 0.00020781941321065765, 'batcher': True}. Best is trial 3 with value: 0.1203045834559908.
[I 2023-12-06 20:29:45,510] Trial 4 finished with value: -1354.9940499355241 and parameters: {

In [23]:
best_par_tree = study_tree.best_params
print(best_par_tree)

{'hidden_dim': 64, 'k': 1, 'learning_rate': 5.583080222377766e-05, 'batcher': True}


In [24]:
study_tree.sampler.__class__.__name__

'TPESampler'

In [25]:
df_tree = study_tree.trials_dataframe()
df_tree.to_csv("study_label-tree_normalized.csv")
df_tree

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_hidden_dim,params_k,params_learning_rate,state
0,0,-585595800000.0,2023-12-06 20:20:50.399556,2023-12-06 20:22:22.404894,0 days 00:01:32.005338,False,288,4,0.007238,COMPLETE
1,1,-0.3307451,2023-12-06 20:22:22.406894,2023-12-06 20:24:17.455261,0 days 00:01:55.048367,False,320,5,0.004693,COMPLETE
2,2,-146.4099,2023-12-06 20:24:17.458105,2023-12-06 20:26:02.719426,0 days 00:01:45.261321,False,96,2,0.002293,COMPLETE
3,3,0.1203046,2023-12-06 20:26:02.720426,2023-12-06 20:27:55.419247,0 days 00:01:52.698821,True,144,3,0.000208,COMPLETE
4,4,-1354.994,2023-12-06 20:27:55.421246,2023-12-06 20:29:45.510860,0 days 00:01:50.089614,True,352,4,0.000243,COMPLETE
5,5,0.6542202,2023-12-06 20:29:45.512859,2023-12-06 20:31:41.722930,0 days 00:01:56.210071,False,368,1,0.000426,COMPLETE
6,6,-1.75948,2023-12-06 20:31:41.724929,2023-12-06 20:33:35.013115,0 days 00:01:53.288186,False,304,5,1.8e-05,COMPLETE
7,7,-0.7006107,2023-12-06 20:33:35.014116,2023-12-06 20:35:30.919386,0 days 00:01:55.905270,False,432,3,0.001973,COMPLETE
8,8,-0.04605021,2023-12-06 20:35:30.921434,2023-12-06 20:37:08.417338,0 days 00:01:37.495904,True,352,2,0.00128,COMPLETE
9,9,0.1920236,2023-12-06 20:37:08.419337,2023-12-06 20:38:37.776797,0 days 00:01:29.357460,False,464,4,0.00226,COMPLETE


## Default

In [36]:
# setting global parameters
dataset_path = "../datasets/"
label = "default" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [37]:
study_default = optuna.create_study(study_name="Best default label config", directions=['maximize'])
study_default.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-06 21:45:57,607] A new study created in memory with name: Best default label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-06 21:47:32,798] Trial 0 finished with value: -1627.4309482814422 and parameters: {'hidden_dim': 512, 'k': 4, 'learning_rate': 0.002581102885919996, 'batcher': False}. Best is trial 0 with value: -1627.4309482814422.
[I 2023-12-06 21:49:16,186] Trial 1 finished with value: -0.7147257352398951 and parameters: {'hidden_dim': 208, 'k': 5, 'learning_rate': 0.001439126591338573, 'batcher': True}. Best is trial 1 with value: -0.7147257352398951.
[I 2023-12-06 21:51:18,730] Trial 2 finished with value: -0.2880695958798013 and parameters: {'hidden_dim': 416, 'k': 2, 'learning_rate': 0.0027850275047091484, 'batcher': True}. Best is trial 2 with value: -0.2880695958798013.
[I 2023-12-06 21:53:03,541] Trial 3 finished with value: -0.5305804920359419 and parameters: {'hidden_dim': 16, 'k': 5, 'learning_rate': 0.00023930619590446567, 'batcher': True}. Best is trial 2 with value: -0.2880695958798013.
[I 2023-12-06 21:54:59,892] Trial 4 finished with value: -0.6143622402672674 and paramete

In [38]:
best_par_default = study_default.best_params
print(best_par_default)

{'hidden_dim': 240, 'k': 5, 'learning_rate': 2.9248136362925073e-05, 'batcher': False}


In [39]:
study_default.sampler.__class__.__name__

'TPESampler'

In [40]:
df_default = study_default.trials_dataframe()
df_default.to_csv("study_label-default_normalized.csv")
df_default

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_hidden_dim,params_k,params_learning_rate,state
0,0,-1627.430948,2023-12-06 21:45:57.612615,2023-12-06 21:47:32.798901,0 days 00:01:35.186286,False,512,4,0.002581,COMPLETE
1,1,-0.714726,2023-12-06 21:47:32.799899,2023-12-06 21:49:16.185228,0 days 00:01:43.385329,True,208,5,0.001439,COMPLETE
2,2,-0.28807,2023-12-06 21:49:16.189229,2023-12-06 21:51:18.730740,0 days 00:02:02.541511,True,416,2,0.002785,COMPLETE
3,3,-0.53058,2023-12-06 21:51:18.733255,2023-12-06 21:53:03.541146,0 days 00:01:44.807891,True,16,5,0.000239,COMPLETE
4,4,-0.614362,2023-12-06 21:53:03.543785,2023-12-06 21:54:59.892620,0 days 00:01:56.348835,True,448,4,0.001205,COMPLETE
5,5,-0.635852,2023-12-06 21:54:59.894128,2023-12-06 21:57:02.480646,0 days 00:02:02.586518,True,384,5,2.4e-05,COMPLETE
6,6,-1.530061,2023-12-06 21:57:02.481643,2023-12-06 21:58:59.256473,0 days 00:01:56.774830,False,176,3,0.001561,COMPLETE
7,7,-0.602636,2023-12-06 21:58:59.259461,2023-12-06 22:00:52.620925,0 days 00:01:53.361464,True,112,1,1.4e-05,COMPLETE
8,8,-0.615209,2023-12-06 22:00:52.622926,2023-12-06 22:02:25.605928,0 days 00:01:32.983002,True,128,3,0.001122,COMPLETE
9,9,-0.622172,2023-12-06 22:02:25.607929,2023-12-06 22:04:13.670491,0 days 00:01:48.062562,True,336,3,0.002352,COMPLETE
