# Hyperoptimization

## Import section

In [1]:
import sys
import os
from functools import partial

import pandas as pd
import ray
from ray.tune import CLIReporter, JupyterNotebookReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.hyperopt import HyperOptSearch

sys.path.append("../../")

from ptgnn.runtime_config.config_helpers import run_config_adapter, load_and_merge_default_configs
from ptgnn.runtime_config.config import import_as, export_as

In [2]:
from ray import tune, train
import os

## Load default config

In [3]:
# fetching config file
benchmark_config = import_as("configs/benchmarking/rs/benchmark_instructions_rs_ptree_vertex_default_e_linred.yaml")
display(benchmark_config)

{'output_dir': 'results/rs_ptree/vertex_default_e_linred',
 'config_files': ['configs/hyper_param_opt/subsetting.yaml',
  'configs/hyper_param_opt/epoch_reduction.yaml',
  'configs/linear_reduction.yaml',
  'configs/vertex_mode.yaml',
  'configs/datasets/rs_dataset.yaml',
  'configs/models/ptree_e.yaml',
  'configs/general.yaml'],
 'search_space': {'model': {'modules': {1: {'times': 'tune.randint(1,6)'}}}},
 'hyper_settings': {'scheduler': {'grace_period': 2,
   'reduction_factor': 3,
   'brackets': 1},
  'max_concurrent_trials': 2,
  'num_samples': 6,
  'stopper': {'num_results': 5, 'metric_threshold': 0.95}}}

In [4]:
default_config = load_and_merge_default_configs(
    benchmark_config['config_files']
)
display(default_config)

{'data': {'subset_size': 10000,
  'dataset': {'graph_mode': 'vertex',
   'type': 'rs',
   'mask_chiral_tags': True,
   'transformation_mode': 'permutation_tree',
   'transformation_parameters': {'k': 3}},
  'loader': {'general': {'n_neighbors_in_circle': 3,
    'batch_size': 32,
    'num_workers': 0},
   'train': {'sampler': 'single_conformer_sampler'},
   'val': {'sampler': 'full_batch'},
   'test': {'sampler': 'full_batch'}}},
 'training': {'n_max_epochs': 10,
  'loss_function': 'cross_entropy',
  'optimization_metric': 'accuracy',
  'optimization_metric_mode': 'max',
  'task_type': 'classification',
  'clip_grad_norm': True},
 'model': {'modules': {1: {'parameter': {'local_model_params': {'edge_mode': 'linear_reduction',
      'k': 3,
      'apply_p_elu': True},
     'local_model': 'permutation_tree_e'},
    'type': 'gps_layer',
    'times': 5},
   0: {'type': 'graph_embedding',
    'parameter': {'node_type': 'linear', 'edge_type': 'linear'}}},
  'out_dim': 1,
  'mode': 'custom',
  

In [5]:
# create absolute path to root dict that is not hyper-opt-run specific
default_config['data']['dataset']['root'] = os.path.abspath(
    os.path.join("src", default_config['data']['dataset']['type'])
)

In [6]:
# currently limit number of epochs
default_config['training']['n_max_epochs'] = 3

In [7]:
display(default_config)

{'data': {'subset_size': 10000,
  'dataset': {'graph_mode': 'vertex',
   'type': 'rs',
   'mask_chiral_tags': True,
   'transformation_mode': 'permutation_tree',
   'transformation_parameters': {'k': 3},
   'root': 'D:\\DATEN\\Masterarbeit_PTGNN\\notebooks\\hyperoptimization\\src\\rs'},
  'loader': {'general': {'n_neighbors_in_circle': 3,
    'batch_size': 32,
    'num_workers': 0},
   'train': {'sampler': 'single_conformer_sampler'},
   'val': {'sampler': 'full_batch'},
   'test': {'sampler': 'full_batch'}}},
 'training': {'n_max_epochs': 3,
  'loss_function': 'cross_entropy',
  'optimization_metric': 'accuracy',
  'optimization_metric_mode': 'max',
  'task_type': 'classification',
  'clip_grad_norm': True},
 'model': {'modules': {1: {'parameter': {'local_model_params': {'edge_mode': 'linear_reduction',
      'k': 3,
      'apply_p_elu': True},
     'local_model': 'permutation_tree_e'},
    'type': 'gps_layer',
    'times': 5},
   0: {'type': 'graph_embedding',
    'parameter': {'node

## Define search space

In [8]:
def eval_search_space(d):
    for key in d.keys():
        temp = d[key]

        if isinstance(temp, dict):
            eval_search_space(temp)
        elif isinstance(temp, str):
            d[key] = eval(temp)
        else:
            raise Exception("unknown type in search space, only use str as values")
    return d

In [9]:
search_space = eval_search_space(benchmark_config['search_space'])

## Define trainable function

In [10]:
#trainable_function = partial(
#    run_config_adapter,
#    default_config=default_config,
#    report=True,
#    verbose=False
#)

In [11]:
def trainable_function(config):
    run_config_adapter(
        config,
        default_config=default_config,
        report=True,
        verbose=False,
        device=None
    )

## Short test

In [12]:
test_config = {
    'training': {
        'n_max_epochs': 10
    },
    'model': {
        'modules': {
            1: {
                'times': 5, #10,
                'parameter': {
                    'dropout': 0.0
                }
            }
        }
    }
}

In [13]:
test_fn = partial(
    run_config_adapter,
    default_config=default_config,
    report=False,
    verbose=True,
    device='cuda'
)
display(test_fn(test_config))


Epoch: 0


16it [01:10,  4.40s/it]


KeyboardInterrupt: 

In [14]:
display(run_config_adapter(
    test_config,
    default_config=default_config,
    report=False,
    verbose=True,
    device='cpu'
))


Epoch: 0


10it [00:15,  1.52s/it]


KeyboardInterrupt: 

In [13]:
test_fn = partial(
    run_config_adapter,
    default_config=default_config,
    report=False,
    verbose=True,
    device='cuda'
)
test_fn(test_config)


Epoch: 0


312it [25:51,  4.97s/it]
 59%|█████▉    | 184/313 [07:45<05:26,  2.53s/it]


KeyboardInterrupt: 

In [14]:
display(run_config_adapter(
    test_config,
    default_config=default_config,
    report=False,
    verbose=True,
    device='cpu'
))


Epoch: 0


312it [11:19,  2.18s/it]
100%|██████████| 313/313 [02:48<00:00,  1.85it/s]



Epoch: 1


162it [05:40,  2.10s/it]


KeyboardInterrupt: 

## Set up tuner

In [None]:
# fetch score to optimize
optimization_score = "val_" + default_config['training']['optimization_metric']
score_mode = default_config['training']['optimization_metric_mode']

In [None]:
# fetch hyperopt settings
hyper_settings = benchmark_config['hyper_settings']

In [None]:
ray.init(runtime_env={
    "working_dir": "../../"
})

In [None]:
# define tuner and execute
tuner = tune.Tuner(
    tune.with_resources(trainable_function, {"cpu": 4, "gpu": 0.5}),
    # trainable=trainable_function,
    param_space=search_space,
    tune_config=tune.TuneConfig(
        metric=optimization_score,
        mode=score_mode,
        search_alg=HyperOptSearch(
            metric=optimization_score,
            mode=score_mode,
            random_state_seed=13
        ),
        scheduler=ASHAScheduler(
            max_t=default_config['training']['n_max_epochs'],
            grace_period=hyper_settings['scheduler']['grace_period'],
            reduction_factor=hyper_settings['scheduler']['reduction_factor'],
            brackets=hyper_settings['scheduler']['brackets']
        ),
        num_samples=hyper_settings['num_samples'],
        max_concurrent_trials=hyper_settings['max_concurrent_trials'],
    ),
    run_config=train.RunConfig(
        progress_reporter=CLIReporter(
            metric_columns=[optimization_score],
        )
    )
)
results = tuner.fit()

In [None]:
results

In [None]:
results.get_dataframe()

In [None]:
dfs = {result.path: result.metrics_dataframe for result in results}
# Plot by epoch
ax = None  # This plots everything on the same plot
for d in dfs.values():
    ax = d.val_accuracy.plot(ax=ax, legend=False)

## Storing results

In [None]:
output_path = benchmark_config['output_dir']

# make sure that output_dir exists
if not os.path.exists(output_path):
    os.mkdir(output_path)

In [None]:
# save general configs
export_as(default_config, os.path.join(output_path, "general_config.yaml"), save_type='yaml')

In [None]:
# save results dataframe
results.get_dataframe().to_csv(os.path.join(output_path, "results.csv"), index=None)

In [None]:
# for each trial save results
for result in results:
    # get metrics
    trial_metrics = result.metrics_dataframe

    # get trial id
    trial_id = trial_metrics.trial_id[0]

    # get config
    trial_config = result.config

    # saving
    trial_metrics.to_csv(os.path.join(output_path, f"{trial_id}.csv"), index=None)
    export_as(trial_config, os.path.join(output_path, f"{trial_id}.yaml"), save_type='yaml')