# Parameter search with optuna - ETNN

## Importing section

In [1]:
import optuna

import numpy as np

import torch
from sklearn.metrics import r2_score
from torch.utils.data import random_split, DataLoader

from etnn import TreeNode
from etnn.nn.layer_framework import LayerManagementFramework
from etnn.routines.run_config import choice_dataset, choice_trainloader, choice_loss, choice_optim
from etnn.tools.training import train_epoch, eval_epoch
from etnn.tools.training_tools import ConfigStore, seeding_all

## Definition of objective function for ETNN

In [2]:
def objective(trial):
    # init default config
    config = ConfigStore(
        in_dim=15,
        hidden_dim=trial.suggest_int("hidden_dim", 16, 512, step=16),
        out_dim=1,
        k=trial.suggest_int("k", 1, 5),
        dataset=-1,
        ds_size=10_000,
        num_gondolas=10,
        num_part_pg=5,
        loss_name='mse',
        optimizer_name='adam',
        num_max_epochs=30, # real: 100
        learning_rate=trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True),
        batch_size=1024,
        early_stop_tol=5,
        use_equal_batcher=trial.suggest_categorical("batcher", [True, False]),
        seed=420,
        label_type=label,
        final_label_factor=1/1000
    )

    # loading dataset
    dataset, df_index = choice_dataset(config, dataset_path)
    # splitting off test dataset
    generator = torch.Generator().manual_seed(config.seed)
    train_ds, val_ds, _ = random_split(
        dataset,
        [1 - test_perc - val_perc, val_perc, test_perc],
        generator=generator
    )

    # loaders
    train_loader = choice_trainloader(config, df_index, train_ds)
    val_loader = DataLoader(val_ds, batch_size=4 * config.batch_size, shuffle=False)

    # build tree
    tree_structure = TreeNode(
        node_type="C",
        children=[
            TreeNode("P", [TreeNode("E", config.num_part_pg)])
            for _ in range(config.num_gondolas)
        ]
    )

    # define device
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # set seed for reproducability
    seeding_all(config.seed)

    # define model
    model = LayerManagementFramework(
        in_dim=config.in_dim,
        tree=tree_structure,
        hidden_dim=config.hidden_dim,
        out_dim=config.out_dim,
        k=config.k
    ).to(device)

    # learning tools
    criterion = choice_loss(config)
    optimizer = choice_optim(config, model)

    # init score list
    score_list = []

    # train for specified number of epochs
    for epoch in range(config.num_max_epochs):
        _, _, _ = train_epoch(
            model,
            train_loader,
            optimizer,
            device,
            criterion
        )

        _, val_true_y, val_pred_y = eval_epoch(
            model,
            val_loader,
            device,
            criterion
        )

        # calc r2 score and append
        score = r2_score(y_true=val_true_y, y_pred=val_pred_y)
        score_list += [score]
        trial.report(score, epoch)

    # calculate objective
    # display(score_list)
    # idea: last x r2 scores (why not last one? for stability purposes)
    obj = np.array(score_list)[-stability_count:]
    return np.mean(obj)

## Tree advanced label

In [3]:
# setting global parameters
dataset_path = "../datasets/"
label = "tree_advanced" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [4]:
study_tree_advanced = optuna.create_study(study_name="Best tree advanced label config", directions=['maximize'])
study_tree_advanced.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-08 16:13:05,767] A new study created in memory with name: Best tree advanced label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-08 16:14:16,353] Trial 0 finished with value: 0.6197993166565362 and parameters: {'hidden_dim': 48, 'k': 2, 'learning_rate': 0.001404760921273804, 'batcher': True}. Best is trial 0 with value: 0.6197993166565362.
[I 2023-12-08 16:15:25,168] Trial 1 finished with value: 0.5448271922035102 and parameters: {'hidden_dim': 48, 'k': 3, 'learning_rate': 0.005479435647565049, 'batcher': False}. Best is trial 0 with value: 0.6197993166565362.
[I 2023-12-08 16:16:39,430] Trial 2 finished with value: 0.8597172263184693 and parameters: {'hidden_dim': 208, 'k': 2, 'learning_rate': 0.0005585792205323666, 'batcher': True}. Best is trial 2 with value: 0.8597172263184693.
[I 2023-12-08 16:17:53,717] Trial 3 finished with value: 0.7148301882001483 and parameters: {'hidden_dim': 240, 'k': 2, 'learning_rate': 0.001160436782958456, 'batcher': False}. Best is trial 2 with value: 0.8597172263184693.
[I 2023-12-08 16:19:07,696] Trial 4 finished with value: 0.8753248679082833 and parameters: {'hidde

In [5]:
best_par_tree_advanced = study_tree_advanced.best_params
print(best_par_tree_advanced)

{'hidden_dim': 176, 'k': 3, 'learning_rate': 0.005816868281625584, 'batcher': True}


In [6]:
study_tree_advanced.sampler.__class__.__name__

'TPESampler'

In [7]:
df_tree_advanced = study_tree_advanced.trials_dataframe()
df_tree_advanced.to_csv("study_label-tree-advanced_normalized.csv")
df_tree_advanced

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_hidden_dim,params_k,params_learning_rate,state
0,0,0.619799,2023-12-08 16:13:05.774042,2023-12-08 16:14:16.352318,0 days 00:01:10.578276,True,48,2,0.001405,COMPLETE
1,1,0.544827,2023-12-08 16:14:16.354317,2023-12-08 16:15:25.167210,0 days 00:01:08.812893,False,48,3,0.005479,COMPLETE
2,2,0.859717,2023-12-08 16:15:25.169219,2023-12-08 16:16:39.430427,0 days 00:01:14.261208,True,208,2,0.000559,COMPLETE
3,3,0.71483,2023-12-08 16:16:39.432426,2023-12-08 16:17:53.716282,0 days 00:01:14.283856,False,240,2,0.00116,COMPLETE
4,4,0.875325,2023-12-08 16:17:53.718281,2023-12-08 16:19:07.696402,0 days 00:01:13.978121,True,144,5,0.001576,COMPLETE
5,5,0.909923,2023-12-08 16:19:07.697402,2023-12-08 16:20:26.513709,0 days 00:01:18.816307,True,512,2,0.000696,COMPLETE
6,6,0.780209,2023-12-08 16:20:26.514709,2023-12-08 16:21:44.923145,0 days 00:01:18.408436,True,464,3,8.2e-05,COMPLETE
7,7,0.756667,2023-12-08 16:21:44.925146,2023-12-08 16:23:01.388662,0 days 00:01:16.463516,False,384,4,4.1e-05,COMPLETE
8,8,0.790952,2023-12-08 16:23:01.389659,2023-12-08 16:24:17.673083,0 days 00:01:16.283424,False,384,3,6.8e-05,COMPLETE
9,9,0.959574,2023-12-08 16:24:17.675080,2023-12-08 16:25:35.402398,0 days 00:01:17.727318,True,448,3,0.000661,COMPLETE


## Tree label

In [8]:
# setting global parameters
dataset_path = "../datasets/"
label = "tree" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [9]:
study_tree = optuna.create_study(study_name="Best tree label config", directions=['maximize'])
study_tree.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-08 17:18:24,802] A new study created in memory with name: Best tree label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-08 17:19:35,815] Trial 0 finished with value: 0.7143654093002982 and parameters: {'hidden_dim': 64, 'k': 1, 'learning_rate': 0.00808897486316116, 'batcher': True}. Best is trial 0 with value: 0.7143654093002982.
[I 2023-12-08 17:20:52,797] Trial 1 finished with value: 0.6576425911463057 and parameters: {'hidden_dim': 144, 'k': 2, 'learning_rate': 0.0005869126605231803, 'batcher': False}. Best is trial 0 with value: 0.7143654093002982.
[I 2023-12-08 17:22:04,168] Trial 2 finished with value: 0.05168457880629276 and parameters: {'hidden_dim': 80, 'k': 2, 'learning_rate': 5.571384041869843e-05, 'batcher': True}. Best is trial 0 with value: 0.7143654093002982.
[I 2023-12-08 17:23:20,408] Trial 3 finished with value: 0.5012220903667883 and parameters: {'hidden_dim': 272, 'k': 1, 'learning_rate': 1.9399601152500533e-05, 'batcher': True}. Best is trial 0 with value: 0.7143654093002982.
[I 2023-12-08 17:24:38,644] Trial 4 finished with value: 0.6867287496832178 and parameters: {'hid

In [10]:
best_par_tree = study_tree.best_params
print(best_par_tree)

{'hidden_dim': 384, 'k': 4, 'learning_rate': 0.0027798074059227373, 'batcher': True}


In [11]:
study_tree.sampler.__class__.__name__

'TPESampler'

In [12]:
df_tree = study_tree.trials_dataframe()
df_tree.to_csv("study_label-tree_normalized.csv")
df_tree

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_hidden_dim,params_k,params_learning_rate,state
0,0,0.714365,2023-12-08 17:18:24.808651,2023-12-08 17:19:35.815875,0 days 00:01:11.007224,True,64,1,0.008089,COMPLETE
1,1,0.657643,2023-12-08 17:19:35.817877,2023-12-08 17:20:52.797704,0 days 00:01:16.979827,False,144,2,0.000587,COMPLETE
2,2,0.051685,2023-12-08 17:20:52.798675,2023-12-08 17:22:04.168601,0 days 00:01:11.369926,True,80,2,5.6e-05,COMPLETE
3,3,0.501222,2023-12-08 17:22:04.170601,2023-12-08 17:23:20.408833,0 days 00:01:16.238232,True,272,1,1.9e-05,COMPLETE
4,4,0.686729,2023-12-08 17:23:20.409831,2023-12-08 17:24:38.644113,0 days 00:01:18.234282,True,400,4,5.1e-05,COMPLETE
5,5,0.580587,2023-12-08 17:24:38.645564,2023-12-08 17:25:49.259188,0 days 00:01:10.613624,False,112,5,0.000341,COMPLETE
6,6,0.673524,2023-12-08 17:25:49.260188,2023-12-08 17:27:03.408039,0 days 00:01:14.147851,False,224,5,0.000416,COMPLETE
7,7,0.586289,2023-12-08 17:27:03.410040,2023-12-08 17:28:15.027396,0 days 00:01:11.617356,True,160,2,0.000197,COMPLETE
8,8,0.107703,2023-12-08 17:28:15.029400,2023-12-08 17:29:22.553419,0 days 00:01:07.524019,True,32,4,0.000166,COMPLETE
9,9,0.670356,2023-12-08 17:29:22.554419,2023-12-08 17:30:33.623341,0 days 00:01:11.068922,False,176,1,0.000217,COMPLETE


## Default

In [13]:
# setting global parameters
dataset_path = "../datasets/"
label = "default" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [14]:
study_default = optuna.create_study(study_name="Best default label config", directions=['maximize'])
study_default.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-08 18:24:10,900] A new study created in memory with name: Best default label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-08 18:25:30,076] Trial 0 finished with value: -3884.963782892913 and parameters: {'hidden_dim': 96, 'k': 5, 'learning_rate': 1.678216786299938e-05, 'batcher': True}. Best is trial 0 with value: -3884.963782892913.
[I 2023-12-08 18:26:45,263] Trial 1 finished with value: -1.7640259345411073 and parameters: {'hidden_dim': 192, 'k': 2, 'learning_rate': 8.361871302656154e-05, 'batcher': False}. Best is trial 1 with value: -1.7640259345411073.
[I 2023-12-08 18:28:06,499] Trial 2 finished with value: -1.404838149145542 and parameters: {'hidden_dim': 304, 'k': 1, 'learning_rate': 6.427285053806185e-05, 'batcher': False}. Best is trial 2 with value: -1.404838149145542.
[I 2023-12-08 18:29:25,426] Trial 3 finished with value: -2.299086348234805 and parameters: {'hidden_dim': 272, 'k': 1, 'learning_rate': 5.497795303523248e-05, 'batcher': True}. Best is trial 2 with value: -1.404838149145542.
[I 2023-12-08 18:31:09,199] Trial 4 finished with value: -0.8973224186016925 and parameters: 

In [15]:
best_par_default = study_default.best_params
print(best_par_default)

{'hidden_dim': 288, 'k': 4, 'learning_rate': 0.0025371750159581485, 'batcher': False}


In [16]:
study_default.sampler.__class__.__name__

'TPESampler'

In [17]:
df_default = study_default.trials_dataframe()
df_default.to_csv("study_label-default_normalized.csv")
df_default

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_hidden_dim,params_k,params_learning_rate,state
0,0,-3884.963783,2023-12-08 18:24:10.905655,2023-12-08 18:25:30.076900,0 days 00:01:19.171245,True,96,5,1.7e-05,COMPLETE
1,1,-1.764026,2023-12-08 18:25:30.077900,2023-12-08 18:26:45.263649,0 days 00:01:15.185749,False,192,2,8.4e-05,COMPLETE
2,2,-1.404838,2023-12-08 18:26:45.264648,2023-12-08 18:28:06.499404,0 days 00:01:21.234756,False,304,1,6.4e-05,COMPLETE
3,3,-2.299086,2023-12-08 18:28:06.501404,2023-12-08 18:29:25.426486,0 days 00:01:18.925082,True,272,1,5.5e-05,COMPLETE
4,4,-0.897322,2023-12-08 18:29:25.427487,2023-12-08 18:31:09.198669,0 days 00:01:43.771182,True,336,2,0.001021,COMPLETE
5,5,-0.402948,2023-12-08 18:31:09.200668,2023-12-08 18:33:00.465134,0 days 00:01:51.264466,False,496,5,0.000399,COMPLETE
6,6,-0.83762,2023-12-08 18:33:00.467203,2023-12-08 18:34:24.375501,0 days 00:01:23.908298,True,48,3,0.009487,COMPLETE
7,7,-0.110795,2023-12-08 18:34:24.377501,2023-12-08 18:35:55.553725,0 days 00:01:31.176224,False,320,5,0.001922,COMPLETE
8,8,-0.741166,2023-12-08 18:35:55.556238,2023-12-08 18:37:30.119135,0 days 00:01:34.562897,False,400,2,0.000349,COMPLETE
9,9,-0.688807,2023-12-08 18:37:30.120134,2023-12-08 18:38:56.012079,0 days 00:01:25.891945,False,96,4,0.002227,COMPLETE
