### Numerical Experiments

First experiment: adaptivity of the sparse grid is tested

In [None]:
import HPO

import pysgpp

import matplotlib.pyplot as plt

import tensorflow as tf

import sklearn.metrics

from sklearn.model_selection import KFold

import numpy as np
import keras
from keras import backend as K

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasRegressor

from sklearn.compose import ColumnTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import Pipeline

from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectPercentile, chi2

from sklearn.preprocessing import OneHotEncoder, StandardScaler


VERBOSE = 1
CV = 3 #[(slice(None), slice(None))]

DATASETS = []

RESULT0 = []
RESULT1 = []
RESULT2 = []
RESULT3 = []
RESULT4 = []

RESULT0_OPT = []
RESULT1_OPT = []
RESULT2_OPT = []
RESULT3_OPT = []
RESULT4_OPT = []

COST0 = []
COST1 = []
COST2 = []
COST3 = []
COST4 = []

COST0_OPT = []
COST1_OPT = []
COST2_OPT = []
COST3_OPT = []
COST4_OPT = []

### Hyperparameter space definition

In [None]:
ITER = 5

hyperparameterspace = {
    'epochs': ["interval-int", 1, 40],
    'batch_size': ["interval-int", 1, 200],
    'learning_rate': ["interval-log", 0.000000001, 0.1],
    'number_layers': ["interval-int", 1, 20],
    'neurons_per_layer': ["interval-int", 1, 50]
}

hyperparameterspace_special = {}
for key in hyperparameterspace.keys():
    liste = []
    for i in range(1, len(hyperparameterspace[key])):
        liste.append(hyperparameterspace[key][i])
    hyperparameterspace_special[key] = liste



### Model definition

In [None]:
def relu_advanced(x):
    return K.relu(x)


ACTIVATION_FUNCTION = relu_advanced

INITIALIZER = tf.keras.initializers.RandomNormal(stddev=0.05, seed=42)


def create_model(learning_rate=0.0001, input_dim=10, number_layers=1, neurons_per_layer=20):
    # create model
    model = Sequential()
    model.add(Dense(neurons_per_layer, input_shape=(input_dim,), activation=ACTIVATION_FUNCTION,
                    kernel_initializer=INITIALIZER, bias_initializer=INITIALIZER))
    for _ in range(number_layers):
        model.add(Dense(neurons_per_layer, input_shape=(input_dim,), activation=ACTIVATION_FUNCTION,
                        kernel_initializer=INITIALIZER, bias_initializer=INITIALIZER))
    model.add(Dense(1, activation=None))

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model


### Optimization

In [None]:
ids = [233211]#, 359952, 359931, 359949, 359938]
# [359940, 317614, 359934, 359946, 359932, 233214, 359943]

ADAPTIVITIES = [0.0, 0.25, 0.5, 0.75, 1.0]

valid_datasets = 0

for i in range(len(ids)):
    print("######################################################################################################################################################")
    print("Current Dataset:", (i+1), "of", len(ids), "with id:", ids[i])

    dataset = HPO.Dataset(task_id=ids[i])

    print("The average value for target is:", sum(
        dataset.get_Y()/len(dataset.get_Y())))
    print("Min target:", min(dataset.get_Y()),
          "Max target:", max(dataset.get_Y()))

    current_dataset_0 = []
    current_dataset_1 = []
    current_dataset_2 = []
    current_dataset_3 = []
    current_dataset_4 = []

    current_dataset_0_opt = []
    current_dataset_1_opt = []
    current_dataset_2_opt = []
    current_dataset_3_opt = []
    current_dataset_4_opt = []

    current_dataset_cost_0 = []
    current_dataset_cost_1 = []
    current_dataset_cost_2 = []
    current_dataset_cost_3 = []
    current_dataset_cost_4 = []

    current_dataset_cost_0_opt = []
    current_dataset_cost_1_opt = []
    current_dataset_cost_2_opt = []
    current_dataset_cost_3_opt = []
    current_dataset_cost_4_opt = []

    ################## MODEL AND FUNCTION DEFINITION ####################

    def evaluate_model(epochs, batch_size, learning_rate, number_of_layers, neurons_per_layer):

        # return epochs + batch_size + learning_rate + number_of_layers + neurons_per_layer

        kfold = KFold(n_splits=CV)

        split = (kfold.split(dataset.get_X(), dataset.get_Y()))

        values = []

        numeric_features = [not x for x in dataset.get_categorical_indicator()]
        numeric_transformer = Pipeline(
            steps=[("imputer", SimpleImputer(strategy="median")),
                   ("scaler", StandardScaler())]
        )

        categorical_transformer = Pipeline(
            steps=[
                ("encoder", OneHotEncoder(
                    handle_unknown="infrequent_if_exist", sparse_output=False)),
                # ("selector", SelectPercentile(chi2, percentile=50)),
            ]
        )

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer,
                 dataset.get_categorical_indicator()),
            ]
        )

        for i, (train_index, test_index) in enumerate(split):
            X_train = dataset.get_X()[train_index]
            Y_train = dataset.get_Y()[train_index]

            X_val = dataset.get_X()[test_index]
            Y_val = dataset.get_Y()[test_index]

            preprocessor.fit(X_train, Y_train)

            X_train = preprocessor.transform(X_train)
            X_val = preprocessor.transform(X_val)

            regressor = TransformedTargetRegressor(regressor=KerasRegressor(model=create_model,
                                                                            learning_rate=learning_rate,
                                                                            input_dim=len(
                                                                                X_train[0]),
                                                                            number_layers=number_of_layers,
                                                                            neurons_per_layer=neurons_per_layer,
                                                                            verbose=0),
                                                   transformer=StandardScaler())

            regressor.fit(X_train, Y_train, epochs=epochs,
                          batch_size=batch_size)

            Y_predicted = regressor.predict(X_val)
            # error = sklearn.metrics.mean_absolute_error(Y_predicted, Y_val)
            error = sklearn.metrics.mean_absolute_percentage_error(
                Y_predicted, Y_val)
            values.append(error)

            K.clear_session()

        result = sum(values)/len(values)
        return result
    
    ##################### Function for sparse grid search #####################

    class ExampleFunction(pysgpp.ScalarFunction):

        def __init__(self):
            super(ExampleFunction, self).__init__(
                len(hyperparameterspace.keys()))

        def eval(self, x):
            # index = int(x[0]*(len(hyperparameterspace_special["loss"])-1))
            # hyperparameterspace_special["loss"][index]

            epochs = int(HPO.from_standard(
                hyperparameterspace_special["epochs"][0], hyperparameterspace_special["epochs"][1], x[0]))

            batch_size = int(HPO.from_standard(
                hyperparameterspace_special["batch_size"][0], hyperparameterspace_special["batch_size"][1], x[1]))

            model_learning_rate = HPO.from_standard_log(hyperparameterspace_special["learning_rate"][
                                                        0], hyperparameterspace_special["learning_rate"][1], x[2])

            number_of_layers = int(HPO.from_standard(
                hyperparameterspace_special["number_layers"][0], hyperparameterspace_special["number_layers"][1], x[3]))

            neurons_per_layer = int(HPO.from_standard(
                hyperparameterspace_special["neurons_per_layer"][0], hyperparameterspace_special["neurons_per_layer"][1], x[4]))

            return evaluate_model(epochs, batch_size, model_learning_rate, number_of_layers, neurons_per_layer)

    ##### For each dataset: run models with different budget #####
    BUDGET = 1
    for j in range(ITER):
        
        BUDGET = (j+1) * 10 #BUDGET + 2 ** len(hyperparameterspace.keys())

        print("\n################################################## Current Budget:",
              BUDGET, "##################################################")

        ########################### SPARSE OPT ############################
        print("\nPerforming sparse search with adaptivity ", ADAPTIVITIES[0])

        f = ExampleFunction()

        optimization = HPO.SparseGridSearchOptimization(
            dataset, f, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, degree=3, adaptivity=ADAPTIVITIES[0], optimizer="gradient_descent")

        result = optimization.fit()

        print("Best score with Sparse Search:", result[0][1], "optimized:", result[0][3])


        current_dataset_0.append(result[0][1])
        current_dataset_0_opt.append(result[0][3])

        current_dataset_cost_0.append(result[1])
        current_dataset_cost_0_opt.append(result[1]+1)

        K.clear_session()

        ########################### SPARSE OPT ############################
        print("\nPerforming sparse search with adaptivity ", ADAPTIVITIES[1])

        f = ExampleFunction()

        optimization = HPO.SparseGridSearchOptimization(
            dataset, f, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, degree=3, adaptivity=ADAPTIVITIES[1], optimizer="gradient_descent")

        result = optimization.fit()

        print("Best score with Sparse Search:", result[0][1], "optimized:", result[0][3])


        current_dataset_1.append(result[0][1])
        current_dataset_1_opt.append(result[0][3])

        current_dataset_cost_1.append(result[1])
        current_dataset_cost_1_opt.append(result[1]+1)

        K.clear_session()

        ########################### SPARSE OPT ############################
        print("\nPerforming sparse search with adaptivity ", ADAPTIVITIES[2])

        f = ExampleFunction()

        optimization = HPO.SparseGridSearchOptimization(
            dataset, f, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, degree=3, adaptivity=ADAPTIVITIES[2], optimizer="gradient_descent")

        result = optimization.fit()

        print("Best score with Sparse Search:", result[0][1], "optimized:", result[0][3])


        current_dataset_2.append(result[0][1])
        current_dataset_2_opt.append(result[0][3])

        current_dataset_cost_2.append(result[1])
        current_dataset_cost_2_opt.append(result[1]+1)

        K.clear_session()

        ########################### SPARSE OPT ############################
        print("\nPerforming sparse search with adaptivity ", ADAPTIVITIES[3])

        f = ExampleFunction()

        optimization = HPO.SparseGridSearchOptimization(
            dataset, f, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, degree=3, adaptivity=ADAPTIVITIES[3], optimizer="gradient_descent")

        result = optimization.fit()

        print("Best score with Sparse Search:", result[0][1], "optimized:", result[0][3])


        current_dataset_3.append(result[0][1])
        current_dataset_3_opt.append(result[0][3])

        current_dataset_cost_3.append(result[1])
        current_dataset_cost_3_opt.append(result[1]+1)

        K.clear_session()

        ########################### SPARSE OPT ############################
        print("\nPerforming sparse search with adaptivity ", ADAPTIVITIES[4])

        f = ExampleFunction()

        optimization = HPO.SparseGridSearchOptimization(
            dataset, f, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, degree=3, adaptivity=ADAPTIVITIES[4], optimizer="gradient_descent")

        result = optimization.fit()

        print("Best score with Sparse Search:", result[0][1], "optimized:", result[0][3])


        current_dataset_4.append(result[0][1])
        current_dataset_4_opt.append(result[0][3])

        current_dataset_cost_4.append(result[1])
        current_dataset_cost_4_opt.append(result[1]+1)

        K.clear_session()



   

    RESULT0.append(current_dataset_0)
    RESULT1.append(current_dataset_1)
    RESULT2.append(current_dataset_2)
    RESULT3.append(current_dataset_3)
    RESULT4.append(current_dataset_4)

    RESULT0_OPT.append(current_dataset_0_opt)
    RESULT1_OPT.append(current_dataset_1_opt)
    RESULT2_OPT.append(current_dataset_2_opt)
    RESULT3_OPT.append(current_dataset_3_opt)
    RESULT4_OPT.append(current_dataset_4_opt)

    COST0.append(current_dataset_cost_0)
    COST1.append(current_dataset_cost_1)
    COST2.append(current_dataset_cost_2)
    COST3.append(current_dataset_cost_3)
    COST4.append(current_dataset_cost_4)

    COST0_OPT.append(current_dataset_cost_0_opt)
    COST1_OPT.append(current_dataset_cost_1_opt)
    COST2_OPT.append(current_dataset_cost_2_opt)
    COST3_OPT.append(current_dataset_cost_3_opt)
    COST4_OPT.append(current_dataset_cost_4_opt)


    print("###################### Current dataset", ids[i], "######################")
    
    dataset = HPO.Dataset(task_id=ids[i])
    
    print("Target average:", sum(
        dataset.get_Y()/len(dataset.get_Y())))
    print("Min target:", min(dataset.get_Y()),
          "Max target:", max(dataset.get_Y()))

    # plotting the points 
    plt.plot(COST0[i], RESULT0[i], '.-', color='black', label="Adapt.:" + str(ADAPTIVITIES[0]))
    plt.plot(COST1[i], RESULT1[i], '.-', color='red', label="Adapt.:" + str(ADAPTIVITIES[1]))
    plt.plot(COST2[i], RESULT2[i], '.-', color='blue', label="Adapt.:" + str(ADAPTIVITIES[2]))
    plt.plot(COST3[i], RESULT3[i], '.-', color='purple', label="Adapt.:" + str(ADAPTIVITIES[3]))
    plt.plot(COST4[i], RESULT4[i], '.-', color='pink', label="Adapt.:" + str(ADAPTIVITIES[4]))
    
    # naming the x axis
    plt.xlabel('Function evaluations')
    # naming the y axis
    plt.ylabel('Result')
    
    # show a legend on the plot
    plt.legend()
    plt.savefig("Current_tests/task_id"+str(ids[i])+"(not_optimized)")
    # function to show the plot
    plt.show()

    # plotting the points 
    plt.plot(COST0_OPT[i], RESULT0_OPT[i], '.-', color='black', label="Adapt.:" + str(ADAPTIVITIES[0]))
    plt.plot(COST1_OPT[i], RESULT1_OPT[i], '.-', color='red', label="Adapt.:" + str(ADAPTIVITIES[1]))
    plt.plot(COST2_OPT[i], RESULT2_OPT[i], '.-', color='blue', label="Adapt.:" + str(ADAPTIVITIES[2]))
    plt.plot(COST3_OPT[i], RESULT3_OPT[i], '.-', color='purple', label="Adapt.:" + str(ADAPTIVITIES[3]))
    plt.plot(COST4_OPT[i], RESULT4_OPT[i], '.-', color='pink', label="Adapt.:" + str(ADAPTIVITIES[4]))
    
    # naming the x axis
    plt.xlabel('Function evaluations')
    # naming the y axis
    plt.ylabel('Result')
    
    # show a legend on the plot
    plt.legend()
    plt.savefig("Current_tests/task_id"+str(ids[i])+"(optimized)")
    # function to show the plot
    plt.show()
