### Baseline experiment

Experiment to compare the 4 Optimization algorithms before trying to improve sparse search

In [None]:
import HPO

import pysgpp

import matplotlib.pyplot as plt

import tensorflow as tf

import sklearn.metrics

from sklearn.model_selection import KFold

import numpy as np
import keras
from keras import backend as K

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasRegressor

from sklearn.compose import ColumnTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import Pipeline

from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectPercentile, chi2

from sklearn.preprocessing import OneHotEncoder, StandardScaler


VERBOSE = 1
CV = 3 #[(slice(None), slice(None))]
TESTING = True

DATASETS = []

GRID_RESULT = []
RANDOM_RESULT = []
BAYESIAN_RESULT = []
SPARSE_RESULT = []
SPARSE_RESULT_OPTIMIZED = []

GRID_COST = []
RANDOM_COST = []
BAYESIAN_COST = []
SPARSE_COST = []
SPARSE_COST_OPTIMIZED = []

### Hyperparameter space definition

In [None]:
ITER = 2

hyperparameterspace = {
    'regressor__regressor__epochs': ["interval-int", 15, 20],
    'regressor__regressor__batch_size': ["interval-int", 1, 200],
    'regressor__regressor__model__optimizer__learning_rate': ["interval-log", 0.000001, 0.1]
}

hyperparameterspace_special = {}
for key in hyperparameterspace.keys():
    liste = []
    for i in range(1, len(hyperparameterspace[key])):
        liste.append(hyperparameterspace[key][i])
    hyperparameterspace_special[key] = liste



### Model definition

In [None]:
def relu_advanced(x):
    return K.relu(x)

ACTIVATION_FUNCTION = relu_advanced

INITIALIZER = tf.keras.initializers.RandomNormal(stddev=0.05, seed=42)
# 
def create_model(learning_rate=0.0001, input_dim=10):
    # create model
    model = Sequential()
    model.add(Dense(30, input_shape=(input_dim,), activation=ACTIVATION_FUNCTION,
                kernel_initializer=INITIALIZER, bias_initializer=INITIALIZER))
    model.add(Dense(30, activation=ACTIVATION_FUNCTION,
                kernel_initializer=INITIALIZER, bias_initializer=INITIALIZER))
    model.add(Dense(1, activation=None))

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(loss='mean_absolute_error', optimizer=optimizer)
    return model

### Optimization

In [None]:
ids = [233211]#, 359952, 359931, 359949, 359938]
# [359940, 317614, 359934, 359946, 359932, 233214, 359943]

valid_datasets = 0
for i in range(len(ids)):
    print("######################################################################################################################################################")
    print("Current Dataset:", (i+1), "of", len(ids), "with id:", ids[i])

    dataset = HPO.Dataset(task_id=ids[i])

    print("The average value for target is:", sum(
        dataset.get_Y()/len(dataset.get_Y())))
    print("Min target:", min(dataset.get_Y()),
          "Max target:", max(dataset.get_Y()))

    ################## MODEL AND FUNCTION DEFINITION ####################

    def evaluate_model(loss, epochs, batch_size, model_learning_rate, neurons_per_layer, number_of_layers):

        kfold = KFold(n_splits=CV)

        split = (kfold.split(dataset.get_X(), dataset.get_Y()))

        values = []

        numeric_features = [not x for x in dataset.get_categorical_indicator()]
        numeric_transformer = Pipeline(
            steps=[("imputer", SimpleImputer(strategy="median")),
                   ("scaler", StandardScaler())]
        )

        categorical_transformer = Pipeline(
            steps=[
                ("encoder", OneHotEncoder(handle_unknown="infrequent_if_exist", sparse_output=False)),
                # ("selector", SelectPercentile(chi2, percentile=50)),
            ]
        )

        number_numeric_features = 0
        for x in numeric_features:
            if x:
                number_numeric_features += 1

        preprocessor = ColumnTransformer(
            transformers=[
                ("num", numeric_transformer, numeric_features),
                ("cat", categorical_transformer,
                 dataset.get_categorical_indicator()),
            ]
        )

        #preprocessor.fit(dataset.get_X())
        # (number_numeric_features+len(categorical_transformer["encoder"].categories_))

        # final regressor
        

        # pipeline = Pipeline([
        #     ('preprocessor', preprocessor),
        #     ('regressor', regressor)
        # ])

        for i, (train_index, test_index) in enumerate(split):
            X_train = dataset.get_X()[train_index]
            Y_train = dataset.get_Y()[train_index]

            X_val = dataset.get_X()[test_index]
            Y_val = dataset.get_Y()[test_index]

            preprocessor.fit(X_train)

            X_train = preprocessor.transform(X_train)
            X_val = preprocessor.transform(X_val)

            regressor = TransformedTargetRegressor(regressor=KerasRegressor(model=create_model, input_dim=len(X_train[0]), verbose=0),
                                               transformer=StandardScaler())
            
            regressor.fit(X_train, Y_train, epochs=epochs,
                         batch_size=batch_size)

            Y_predicted = regressor.predict(X_val)
            error = sklearn.metrics.mean_absolute_error(Y_predicted, Y_val)
            values.append(error)

            K.clear_session()

        result = sum(values)/len(values)
        return result

    def blackboxfunction(params):
        # index = int(params[0]*(len(hyperparameterspace_special["loss"])-1))
        # hyperparameterspace_special["loss"][index]
        loss = 'mean_squared_error'

        epochs = int(params[0])

        batch_size = int(params[1])

        model_learning_rate = params[2]

        neurons_per_layer = 40  # int(params[3])

        number_of_layers = 1  # int(params[4])

        return evaluate_model(loss, epochs, batch_size, model_learning_rate, neurons_per_layer, number_of_layers)

    ##################### Function for sparse grid search #####################

    class ExampleFunction(pysgpp.ScalarFunction):

        def __init__(self):
            super(ExampleFunction, self).__init__(
                len(hyperparameterspace.keys()))

        def eval(self, x):
            # index = int(x[0]*(len(hyperparameterspace_special["loss"])-1))
            # hyperparameterspace_special["loss"][index]
            loss = 'mean_squared_error'

            epochs = int(HPO.from_standard(
                hyperparameterspace_special["regressor__regressor__epochs"][0], hyperparameterspace_special["regressor__regressor__epochs"][1], x[0]))

            batch_size = int(HPO.from_standard(
                hyperparameterspace_special["regressor__regressor__batch_size"][0], hyperparameterspace_special["regressor__regressor__batch_size"][1], x[1]))

            model_learning_rate = HPO.from_standard_log(hyperparameterspace_special["regressor__regressor__model__optimizer__learning_rate"][
                                                        0], hyperparameterspace_special["regressor__regressor__model__optimizer__learning_rate"][1], x[2])

            # int(HPO.from_standard(hyperparameterspace_special["model__neurons_per_layer"][0], hyperparameterspace_special["model__neurons_per_layer"][1], x[3]))
            neurons_per_layer = 40

            # int(HPO.from_standard(hyperparameterspace_special["model__number_of_layers"][0], hyperparameterspace_special["model__number_of_layers"][1], x[4]))
            number_of_layers = 1

            return evaluate_model(loss, epochs, batch_size, model_learning_rate, neurons_per_layer, number_of_layers)

    ##### For each dataset: run models with different budget #####

    for j in range(ITER):
        BUDGET = (j+1) * 3
        print("\n################################################## Current Budget:",
              BUDGET, "##################################################")

        ############################## GRID SEARCH #######################
        # print("Performing grid search")

        # optimization = HPO.GridSearchOptimization(
        #     dataset, create_model, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, cv=CV)

        # result, cost = optimization.fit()

        # if VERBOSE > 0:
        #     print("Best params found by grid search:")
        #     print(result.best_params_)

        # GRID_RESULT.append(-result.best_score_)
        # GRID_COST.append(cost)

        # K.clear_session()

        # ########################### RANDOM SEARCH #######################
        # print("Performing random search")

        # optimization = HPO.RandomSearchOptimization(
        #     dataset, create_model, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, cv=CV)

        # result, cost = optimization.fit()

        # if VERBOSE > 0:
        #     print("Best params found by random search:")
        #     print(result.best_params_)

        # RANDOM_RESULT.append(-result.best_score_)
        # RANDOM_COST.append(cost)

        # K.clear_session()

        ########################### BAYESIAN OPT #####################
        print("Performing bayesian optimization")

        optimization = HPO.BayesianOptimization(
            dataset, blackboxfunction, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE)

        result = optimization.fit()

        index_best = 0
        for m in range(len(result[1])):
            if result[1][m] == max(result[1]):
                index_best = m

        best_score = result[1][index_best]
        best_params = result[0][index_best]

        if VERBOSE > 0:
            print("With Hyperparameters: ")
            i = 0
            for key in hyperparameterspace.keys():
                if hyperparameterspace[key][0] == "list":
                    index = int(
                        best_params[i]*(len(hyperparameterspace_special[key])-1))
                    print(key + ": " +
                          str(hyperparameterspace_special[key][index]))
                else:
                    print(key + ": " + str(best_params[i]))
                i += 1

        BAYESIAN_RESULT.append(best_score)
        BAYESIAN_COST.append(BUDGET)

        K.clear_session()

        ########################### SPARSE OPT ############################
        print("Performing sparse search")

        f = ExampleFunction()

        optimization = HPO.SparseGridSearchOptimization(
            dataset, f, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, degree=2, adaptivity=0.2, optimizer="rprop")

        result = optimization.fit()

        SPARSE_RESULT.append(result[0][1])
        SPARSE_RESULT_OPTIMIZED.append(result[0][3])

        SPARSE_COST.append(result[1])
        SPARSE_COST_OPTIMIZED.append(result[1])

        K.clear_session()

        print(GRID_RESULT)
        print(RANDOM_RESULT)
        print(BAYESIAN_RESULT)
        print(SPARSE_RESULT)
        print(SPARSE_RESULT_OPTIMIZED)


In [None]:
print(GRID_RESULT)
print(RANDOM_RESULT)
print(BAYESIAN_RESULT)
print(SPARSE_RESULT)
print(SPARSE_RESULT_OPTIMIZED)
count = 0
for i in range(len(ids)):
    print("Current dataset:", i, "with name id:", ids[i])
    for j in range(ITER):
        plt.plot(GRID_COST[count], GRID_RESULT[count], '+', color='black')
        plt.plot(RANDOM_COST[count], RANDOM_RESULT[count], 'x', color='red')
        plt.plot(BAYESIAN_COST[count], BAYESIAN_RESULT[count], '.', color='blue')
        plt.plot(SPARSE_COST[count], SPARSE_RESULT[count], '+', color='purple')
        plt.plot(SPARSE_COST_OPTIMIZED[count], SPARSE_RESULT_OPTIMIZED[count], 'x', color='pink')
        plt.xlabel("Cost")
        plt.ylabel("Result (mean squared error)")
        plt.yscale("log")
        plt.legend(["Grid search", "Random search", "Bayesian Opt", "Sparse search", "Sparse search (opt)"], bbox_to_anchor=(1.04, 1), loc="upper left")
        count += 1
    plt.show()

