### Baseline experiment

Experiment to compare the 4 Optimization algorithms before trying to improve sparse search

In [1]:
import sys  
sys.path.insert(0, '../../')

import HPO
import pysgpp
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn.metrics
from sklearn.model_selection import KFold
import numpy as np
import keras
from keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasRegressor
from sklearn.compose import ColumnTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from numpy.random import seed
import random
import time
import os
import math
from matplotlib import cm
import matplotlib 

import operator

matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

random.seed(1)
seed(2)
tf.random.set_seed(3)

def reset_seeds():
    np.random.seed(1)
    random.seed(2)
    tf.random.set_seed(3)

VERBOSE = 0

SPARSE_RESULT = []
SPARSE_RESULT_OPTIMIZED = []

SPARSE_COST = []
SPARSE_COST_OPTIMIZED = []

2023-07-21 08:47:44.220948: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Model creation function

In [2]:
def create_model(learning_rate=0.0001, input_dim=10, number_layers=1, neurons_per_layer=20, dropout_prob=0.2):
    # create model
    model = Sequential()

    model.add(Dense(int(neurons_per_layer), input_shape=(input_dim,), activation='relu'))
    for _ in range(int(number_layers)):
        #model.add(Dropout(dropout_prob, seed=0))    
        model.add(Dense(int(neurons_per_layer), activation='relu'))
    model.add(Dense(1, activation=None))

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model


### Hyperparameter space

In [3]:
hyperparameterspace = {
    'epochs': ["interval-int", 1, 30],
    'batch_size': ["interval-int", 100, 1000],
    'learning rate': ["interval-log", 1e-10, 1e-1],
    'number_layers': ["interval-int", 1, 10],
    'neurons_per_layer': ["interval-int", 1, 40],
    'dropout_prob': ["interval", 0, 0.999]
}

hyperparameterspace_special = {}
for key in hyperparameterspace.keys():
    liste = []
    for i in range(1, len(hyperparameterspace[key])):
        liste.append(hyperparameterspace[key][i])
    hyperparameterspace_special[key] = liste

### Optimization

In [12]:

################## MODEL AND FUNCTION DEFINITION ####################
CV = 2
dataset = HPO.Dataset(task_id=359938)

def evaluate_model(epochs, batch_size, learning_rate, number_of_layers, neurons_per_layer, dropout_prob, deterministic=True):

    #return epochs + batch_size + learning_rate + number_of_layers + neurons_per_layer


    kfold = KFold(n_splits=CV)

    split = (kfold.split(dataset.get_X(), dataset.get_Y()))

    values = []

    numeric_features = [not x for x in dataset.get_categorical_indicator()]
    numeric_transformer = Pipeline(
        steps=[("imputer", SimpleImputer(strategy="median")),
                ("scaler", StandardScaler())]
    )

    categorical_transformer = Pipeline(
        steps=[
            ("encoder", OneHotEncoder(
                handle_unknown="infrequent_if_exist", sparse_output=False)),
            # ("selector", SelectPercentile(chi2, percentile=50)),
        ]
    )

    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer,
                dataset.get_categorical_indicator()),
        ]
    )

    for i, (train_index, test_index) in enumerate(split):

        if deterministic:
            reset_seeds()

        X_train = dataset.get_X()[train_index]
        Y_train = dataset.get_Y()[train_index]

        X_val = dataset.get_X()[test_index]
        Y_val = dataset.get_Y()[test_index]

        preprocessor.fit(X_train, Y_train)

        X_train = preprocessor.transform(X_train)
        X_val = preprocessor.transform(X_val)

        regressor = KerasRegressor(model=create_model,
                                    learning_rate=learning_rate,
                                    input_dim=len(
                                        X_train[0]),
                                    number_layers=number_of_layers,
                                    neurons_per_layer=neurons_per_layer,
                                    dropout_prob=dropout_prob,
                                    verbose=0)

        regressor = TransformedTargetRegressor(regressor=regressor,
                                                transformer=StandardScaler())

        regressor.fit(X_train, Y_train, epochs=int(epochs),
                        batch_size=int(batch_size), shuffle=False)

        Y_predicted = regressor.predict(X_val)
        # error = sklearn.metrics.mean_absolute_error(Y_predicted, Y_val)
        error = sklearn.metrics.mean_absolute_percentage_error(
            Y_predicted, Y_val)
        values.append(error)

        del regressor
        K.clear_session()

    result = sum(values)/len(values)
    return result

def function(coordinates):
        
    return evaluate_model(epochs=coordinates[0], batch_size=coordinates[1], learning_rate=coordinates[2], number_of_layers=coordinates[3], neurons_per_layer=coordinates[4], dropout_prob=coordinates[5], deterministic=False)


N_ITERS = 10

ALT_1 = "{"
ALT_2 = "{"
ALT_3 = "{"

BUDGETS = [15, 20, 30, 40, 50, 60, 70, 80, 90, 100]

for budget in BUDGETS:

    summe = 0
    for i in range(N_ITERS):

        optimizer = HPO.IterativeRandomOptimization(dataset, function, 
                                                            hyperparameterspace, budget, 0, 0.9, 
                                                            init_points=15, 
                                                            alternative=0, ref_per_step=4)
        points = optimizer.fit()

        points.sort(key=operator.attrgetter('value'))
        summe += points[0].get_value()

    ALT_1 += "(" + str(budget) + "," + str(summe/N_ITERS) + ")"


    summe = 0
    for i in range(N_ITERS):

        optimizer = HPO.IterativeRandomOptimization(dataset, function, 
                                                            hyperparameterspace, budget, 0, 0.8, 
                                                            init_points=15, 
                                                            alternative=1, ref_per_step=4)
        points = optimizer.fit()

        points.sort(key=operator.attrgetter('value'))
        summe += points[0].get_value()

    ALT_2 += "(" + str(budget) + "," + str(summe/N_ITERS) + ")"


    summe = 0
    for i in range(N_ITERS):

        optimizer = HPO.IterativeRandomOptimization(dataset, function, 
                                                            hyperparameterspace, budget, 0, 0.9, 
                                                            init_points=15, 
                                                            alternative=2, ref_per_step=4)
        points = optimizer.fit()

        points.sort(key=operator.attrgetter('value'))
        summe += points[0].get_value()

    ALT_3 += "(" + str(budget) + "," + str(summe/N_ITERS) + ")"



    ALT_1 += "}"
    ALT_2 += "}"
    ALT_3 += "}"

    print(ALT_1+"\n")
    print(ALT_2+"\n")
    print(ALT_3+"\n")


{(15,0.17085522338747977)}

{(15,0.2135143307968974)}

{(15,0.15346778202801942)}

{(15,0.17085522338747977)}(20,0.15570929311215878)}

{(15,0.2135143307968974)}(20,0.17106599342077972)}

{(15,0.15346778202801942)}(20,0.12807831205427647)}

{(15,0.17085522338747977)}(20,0.15570929311215878)}(30,0.12546524107456208)}

{(15,0.2135143307968974)}(20,0.17106599342077972)}(30,0.20546316485852004)}

{(15,0.15346778202801942)}(20,0.12807831205427647)}(30,0.13472946528345348)}

{(15,0.17085522338747977)}(20,0.15570929311215878)}(30,0.12546524107456208)}(40,0.09796145539730787)}

{(15,0.2135143307968974)}(20,0.17106599342077972)}(30,0.20546316485852004)}(40,0.11698700953274965)}

{(15,0.15346778202801942)}(20,0.12807831205427647)}(30,0.13472946528345348)}(40,0.12490067407488822)}

{(15,0.17085522338747977)}(20,0.15570929311215878)}(30,0.12546524107456208)}(40,0.09796145539730787)}(50,0.11236386708915233)}

{(15,0.2135143307968974)}(20,0.17106599342077972)}(30,0.20546316485852004)}(40,0.116987009

In [None]:
def blackboxfunction_grid(params):
    # index = int(params[0]*(len(hyperparameterspace_special["loss"])-1))
    # hyperparameterspace_special["loss"][index]

    epochs = int(params[0])

    batch_size = int(params[1])

    learning_rate = params[2]

    number_of_layers = int(params[3])

    neurons_per_layer = int(params[4])

    dropout_prob = params[5]

    return evaluate_model(epochs, batch_size, learning_rate, number_of_layers, neurons_per_layer, dropout_prob)

def blackboxfunction_random(params):
    # index = int(params[0]*(len(hyperparameterspace_special["loss"])-1))
    # hyperparameterspace_special["loss"][index]

    epochs = int(params[0])

    batch_size = int(params[1])

    learning_rate = params[2]

    number_of_layers = int(params[3])

    neurons_per_layer = int(params[4])

    dropout_prob = params[5]

    return evaluate_model(epochs, batch_size, learning_rate, number_of_layers, neurons_per_layer, dropout_prob, deterministic=False)

def blackboxfunction_bayesian(params):
    # index = int(params[0]*(len(hyperparameterspace_special["loss"])-1))
    # hyperparameterspace_special["loss"][index]

    epochs = int(params[0])

    batch_size = int(params[1])

    model_learning_rate = 10 ** (params[2])

    number_of_layers = int(params[3])

    neurons_per_layer = int(params[4])

    dropout_prob = params[5]

    return evaluate_model(epochs, batch_size, model_learning_rate, number_of_layers, neurons_per_layer, dropout_prob, deterministic=False)

##################### Function for sparse grid search #####################

class ExampleFunction(pysgpp.ScalarFunction):

    def __init__(self):
        super(ExampleFunction, self).__init__(
            len(hyperparameterspace.keys()))

    def eval(self, x):
        # index = int(x[0]*(len(hyperparameterspace_special["loss"])-1))
        # hyperparameterspace_special["loss"][index]

        epochs = int(HPO.from_standard(
            hyperparameterspace_special["epochs"][0], hyperparameterspace_special["epochs"][1], x[0]))

        batch_size = int(HPO.from_standard(
            hyperparameterspace_special["batch_size"][0], hyperparameterspace_special["batch_size"][1], x[1]))

        # HPO.from_standard_log(hyperparameterspace_special["learning_rate"][
        model_learning_rate = HPO.from_standard_log(
            hyperparameterspace_special["learning_rate"][0], hyperparameterspace_special["learning_rate"][1], x[2])

        number_of_layers = int(HPO.from_standard(
            hyperparameterspace_special["number_layers"][0], hyperparameterspace_special["number_layers"][1], x[3]))

        neurons_per_layer = int(HPO.from_standard(
            hyperparameterspace_special["neurons_per_layer"][0], hyperparameterspace_special["neurons_per_layer"][1], x[4]))
        
        dropout_prob = int(HPO.from_standard(
            hyperparameterspace_special["dropout_prob"][0], hyperparameterspace_special["dropout_prob"][1], x[5]))

        return evaluate_model(epochs, batch_size, model_learning_rate, number_of_layers, neurons_per_layer, dropout_prob)
    

BUDGET = 70

print("\nPerforming grid search")
optimization = HPO.GridSearchOptimization(
    dataset, blackboxfunction_grid, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, cv=CV)

result, cost = optimization.fit()

index_best = 0
for m in range(len(result)):
    if result[m][1] < result[index_best][1]:
        index_best = m

best_score = result[index_best][1]
best_params = result[index_best][0]


print("Best score with Grid search:", best_score)

print(str(cost) + "," + str(best_score))

K.clear_session()

# ########################### RANDOM SEARCH #######################
print("\nPerforming random search")

optimization = HPO.RandomSearchOptimization(
    dataset, blackboxfunction_random, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, cv=CV)

result, cost = optimization.fit()

index_best = 0
for m in range(len(result)):
    if result[m][1] < result[index_best][1]:
        index_best = m

best_score = result[index_best][1]
best_params = result[index_best][0]


print("Best score with Random search:", best_score)

print(str(cost) + "," + str(best_score))


K.clear_session()

########################### BAYESIAN OPT #####################
print("\nPerforming bayesian optimization")

optimization = HPO.BayesianOptimization(
    dataset, blackboxfunction_bayesian, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE)

result, cost = optimization.fit()

index_best = 0
for m in range(len(result)):
    if result[m][1] < result[index_best][1]:
        index_best = m

best_score = result[index_best][1]
best_params = result[index_best][0]


print("Best score with Bayesian Optimization:", best_score)


print(str(BUDGET) + "," + str(best_score))


K.clear_session()

########################### SPARSE OPT ############################

print("\nPerforming sparse search")

f = ExampleFunction()

optimization = HPO.SparseGridSearchOptimization(
    dataset, f, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, degree=2, adaptivity=0.85, optimizer="gradient_descent")

[fX0, fX1, fX2], cost = optimization.fit()

cost = cost + 2
bestFX = fX0 
if fX1 < bestFX:
    bestFX = fX1 
if fX2 < bestFX:
    bestFX = fX2

print("Sparse grid search")
print(str(cost) + "," + str(bestFX))
    
K.clear_session()