### Numerical Experiments

6 different datasets are used with ids [31, 1464, 334, 333, 1504]
The different optimizers for sparse grid optimization are compared

In [None]:
import openml

from openml import tasks

import HPO

import pysgpp

import sys

import math
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import sklearn.metrics

from sklearn.model_selection import cross_val_score

import numpy as np
import keras
from keras import backend as K

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from scikeras.wrappers import KerasRegressor, KerasClassifier


def to_standard(lower, upper, value):
    return (value-lower)/(upper-lower)


def from_standard(lower, upper, value):
    return value*(upper-lower)+lower

BUDGET = 50
VERBOSE = 0
CV = 2
SCORING = 'neg_mean_squared_error'
TESTING = False

DATASETS = []


### Hyperparameter space definition

In [None]:
hyperparameterspace = {
    'loss': ["list", 'mean_absolute_error', 'mean_squared_error'],
    'epochs': ["interval-int", 1, 50],
    'batch_size': ["interval-int", 40, 160],
    'optimizer': ["list", 'SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam'],
    'optimizer__learning_rate': ["interval", 0.0000001, 0.01]
}

hyperparameterspace_special = {}
for key in hyperparameterspace.keys():
    liste = []
    for i in range(1, len(hyperparameterspace[key])):
        liste.append(hyperparameterspace[key][i])
    hyperparameterspace_special[key] = liste

In [None]:
ids = [233214, 233211, 359935]#, 359952, 359940, 359931, 317614, 359949, 359934, 359946, 359938, 359932, 359943]

def relu_advanced(x):
    return K.relu(x, max_value=250)

ACTIVATION_FUNCTION = relu_advanced


RESULTS_datasets = [[] for _ in range(len(ids))]

valid_datasets = 0
for i in range(len(ids)):

    DATASETS.append(str(ids[i]))
    
    task = tasks.get_task(ids[i])

    # Get dataset by ID
    dataset = task.get_dataset()

    print("Current dataset:", i, "of", len(ids), "with name:", dataset.name)

    # Get the data itself as a dataframe (or otherwise)
    data, target, _, _ = dataset.get_data(dataset.default_target_attribute, dataset_format="array")
    if np.isnan(data).any() or np.isnan(target).any():
        print("NaN detected, skipping dataset")
        continue

    X = torch.Tensor(data[:1000])
    Y = torch.Tensor(target[:1000])

    dataset = HPO.Dataset(X, Y)

    class ExampleFunction(pysgpp.ScalarFunction):

        def __init__(self):
            super(ExampleFunction, self).__init__(len(hyperparameterspace.keys()))


        def eval(self, x):
            index = int(x[0]*(len(hyperparameterspace_special["loss"])-1))
            loss = hyperparameterspace_special["loss"][index]
            
            epochs = int(from_standard(hyperparameterspace_special["epochs"][0], hyperparameterspace_special["epochs"][1], x[1]))

            batch_size = int(from_standard(hyperparameterspace_special["batch_size"][0], hyperparameterspace_special["batch_size"][1], x[2]))

            index = int(x[3]*(len(hyperparameterspace_special["optimizer"])-1))
            model_optimizer = hyperparameterspace_special["optimizer"][index]

            model_learning_rate = from_standard(hyperparameterspace_special["optimizer__learning_rate"][0], hyperparameterspace_special["optimizer__learning_rate"][1], x[4])
            
            # Function to create model, required for KerasClassifier
            def create_model():
                # create model
                model = Sequential()
                model.add(Dense(20, input_shape=(len(dataset.get_X()[0]),), activation=ACTIVATION_FUNCTION))
                model.add(BatchNormalization())
                model.add(Dense(20, activation=ACTIVATION_FUNCTION))
                model.add(BatchNormalization())
                model.add(Dense(1, activation=ACTIVATION_FUNCTION))
                # Compile model
                if model_optimizer == 'SGD':
                    optimizer = keras.optimizers.SGD(learning_rate=model_learning_rate)
                elif model_optimizer == 'RMSprop':
                    optimizer = keras.optimizers.RMSprop(learning_rate=model_learning_rate)
                elif model_optimizer =='Adagrad':
                    optimizer = keras.optimizers.Adagrad(learning_rate=model_learning_rate)
                elif model_optimizer =='Adadelta':
                    optimizer = keras.optimizers.Adadelta(learning_rate=model_learning_rate)
                elif model_optimizer =='Adam':
                    optimizer = keras.optimizers.Adam(learning_rate=model_learning_rate)
                elif model_optimizer =='Adamax':
                    optimizer = keras.optimizers.Adamax(learning_rate=model_learning_rate)
                elif model_optimizer == 'Nadam':
                    optimizer = keras.optimizers.Nadam(learning_rate=model_learning_rate)

                model.compile(loss=loss, optimizer=optimizer,)
                return model

            model = KerasRegressor(model=create_model, verbose=0)

            model.fit(dataset.get_X_train(), dataset.get_Y_train(), epochs=epochs, batch_size=batch_size)

            if TESTING:
                Y_predicted = model.predict(dataset.get_X_test())
                if np.any(np.isnan(Y_predicted)):
                    return 1000
                return sklearn.metrics.mean_squared_error(dataset.get_Y_test().tolist(), Y_predicted)
            else:
                Y_predicted = model.predict(dataset.get_X_validation())
                if np.any(np.isnan(Y_predicted)):
                    return 1000
                return sklearn.metrics.mean_squared_error(dataset.get_Y_validation().tolist(), Y_predicted)
              

    available_optimizers = ["adaptive_gradient_descent", "adaptive_newton", "bfgs", "differential_evolution", "gradient_descent", "nlcg", "nelder_mead", "newton", "rprop"] # "cmaes", 

    results_temp = []
    results_opt_temp = []

    for j in range(len(available_optimizers)):
        print("Current optimizer:", j, available_optimizers[j])

        sparse_params=[2, 0.95, available_optimizers[j]]

        f = ExampleFunction()

        optimization = HPO.SparseGridSearchOptimization(dataset, f, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, degree=sparse_params[0], adaptivity=sparse_params[1], optimizer=sparse_params[2])

        result = optimization.fit()


        TESTING = True
        results_temp.append(f.eval(result[0]))
        results_temp.append(f.eval(result[1]))
        TESTING = False

    for m in range(len(results_temp)):
        RESULTS_datasets[i].append(results_temp[m])


### Plotting

In [None]:

print(RESULTS_datasets)

for i in range(len(ids)):
    print("Dataset with id", ids[i])

    fig = plt.figure()
    ax = fig.add_axes([0,0,1,1])

    opts = []
    k = 0
    for optimizer_name in available_optimizers:
        opts.append(str(k))
        opts.append(str(k) + "(o.)")
        k += 1

    ax.bar(opts, RESULTS_datasets[i])
    plt.ylim(0.9*min(RESULTS_datasets[i]), None)
    plt.show()

