# Optuna

In [None]:
import torch, argparse, os, sys, optuna, itertools, pickle, warnings, logging, time
import numpy as np, pytorch_lightning as pl
from tqdm.notebook import tqdm
from operator import itemgetter
from Optuna import *

# suppress warning and logging
warnings.filterwarnings('ignore')
logging.disable(sys.maxsize)

if torch.cuda.is_available():
    print('GPU available: ' + torch.cuda.get_device_name())
else:
    raise RuntimeError('No GPU found.')

In the following cell, the architecture search space is specified. <br>
`NUM_TRIALS_PER_TRIAL`: how often the same combination of parameters should be tried (optuna then optimizes over the average loss of these trained models) <br>
`MAX_EPOCHS`: maximum number of training epochs for the models <br>
`unique_trials`: number of trials; it is made sure that optuna is not allowed to choose the same parameter combinations, i.e. the same architecture, more than once. <br>
The `name_string_helper` helps to keep track of the generated .pickle files.

In [None]:
path_data = "../datasets"

# convolutional part
min_conv_layers = 1
max_conv_layers = 3
channels = [10, 13, 16, 19, 22]

# dense layer
units = []

# how often the same combination of parameters should be tried
NUM_TRIALS_PER_TRIAL = 3
MAX_EPOCHS = 200
unique_trials = 50

name_string_helper = 'test_run'

In [None]:
# dimensions
nt, nx = 60, 4

# dataset paths
train_path = os.path.join(path_data, "dataset-train-{:d}-{:d}.pt".format(nt, nx))
val_path = os.path.join(path_data, "dataset-val-{:d}-{:d}.pt".format(nt, nx))

train_data, val_data = torch.load(train_path), torch.load(val_path)

print("Total training examples: {}".format(len(train_data)))
print("Total validation examples: {}".format(len(val_data)))

# define how many training instances should be used
train_sample_numbers = [50, 200, 2000, 20000]
#train_sample_numbers = list(itertools.chain(range(100, 250, 50), range(250, 1000, 250), range(1000, 3000, 500), range(3000, 20001, 1000)))
val_sample_numbers = [int(train_sample_numbers[i]/10) for i in range(len(train_sample_numbers))]
train_subsets = [range(train_sample_numbers[i]) for i in range(len(train_sample_numbers))]
val_subsets = [range(val_sample_numbers[i]) for i in range(len(val_sample_numbers))]

# hyperparameters
hparams_defaults = argparse.Namespace()
hparams_defaults.num_workers = 0
hparams_defaults.lr = 1e-2
hparams_defaults.weight_decay = 0.
hparams_defaults.name = name_string_helper

In [None]:
with tqdm(total=len(train_sample_numbers)) as pbar_samples:

    for train_subset, val_subset in zip(train_subsets, val_subsets):
        
        # We want to create a new study for each amount of training samples that we defined above.
        train_data_subset = torch.utils.data.Subset(train_data, train_subset)
        val_data_subset = torch.utils.data.Subset(val_data, val_subset)
        print("Training examples used: {}".format(len(train_data_subset)))
        print("Validation examples used: {}".format(len(val_data_subset)))

        MODEL_DIR = 'optuna/eq_{}_train_samples/'.format(len(train_data_subset))

        # The total number of validation samples has to be divisible by the batch size for the loss function
        # and the MSE losses to be correctly averaged at validation_epoch_end.
        
        # The batch size is usually chosen to be 100, but the smallest training set has only 100 training samples.
        # In order not to employ batch training (but mini-batch training!) for the smallest set and to improve
        # stochasticity in the smaller training sets, the batch size is set to 50 for them.
        if len(train_data_subset) < 500:
            hparams_defaults.batch_size = 50
        else:
            hparams_defaults.batch_size = 100

        # Create the study.
        study = optuna.create_study(direction='minimize')
        
        with tqdm(total=unique_trials) as pbar_unique_trials:
            
            # Try unique_trials UNIQUE combinations of parameters, not total combinations.
            while unique_trials > len(set(str(t.params) for t in study.trials)):
                study.optimize(lambda trial: objective(trial, hparams_defaults, train_data_subset, val_data_subset, min_conv_layers, max_conv_layers, channels, units, model_dir=MODEL_DIR, num_trials_per_trial=NUM_TRIALS_PER_TRIAL, max_epochs=MAX_EPOCHS), n_trials=1)
                if study.trials[-1].value is not None:
                    pbar_unique_trials.update(1)


        # Give an overview over the study and the best trial.
        print('Number of training data:', len(train_data_subset))
        print('Number of finished trials:', len(study.trials))
        print('Best trial:')
        trial = study.best_trial
        print('vloss:', trial.value)
        print('Params:')
        for key, value in trial.params.items():
            print('{}: {}'.format(key, value))
        print('\n')
        
        # Since we implemented the usage of unique trials by pruning trials for which the combination of parameters
        # had already been used before, the pruned trials show up with None as a value for the mean training loss.
        # We delete the corresponding trials before sorting by the mean loss and saving the results.
        model_results = [[study.trials[i].value, study.trials[i].params] for i in range(len(study.trials))]

        for i in range(len(model_results)-1, -1, -1):
            if model_results[i][0] is None:
                del model_results[i]

        model_results.sort(key=itemgetter(0))

        pickle_path = 'optuna_pickles/'
        if not os.path.isdir(pickle_path):
            os.mkdir(pickle_path)
        
        filename = name_string_helper + '_{}_training_samples.pickle'.format(len(train_data_subset))
        if os.path.isfile(os.path.join(pickle_path, filename)):
            filename = str(time.time()) + filename
            print('File already existed, timestamp was prepended to filename.')
        with open(os.path.join(pickle_path, filename), 'wb') as file:
            pickle.dump(model_results, file)
        pbar_samples.update(1)