### Baseline experiment

Experiment to compare the 4 Optimization algorithms before trying to improve sparse search

In [5]:
import sys  
sys.path.insert(0, '../../')

import HPO
import pysgpp
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn.metrics
from sklearn.model_selection import KFold
import numpy as np
import keras
from keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasRegressor
from sklearn.compose import ColumnTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from numpy.random import seed
import random
import time
import os
import math
from matplotlib import cm
import matplotlib 

import operator

matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

random.seed(1)
seed(2)
tf.random.set_seed(3)

def reset_seeds():
    np.random.seed(1)
    random.seed(2)
    tf.random.set_seed(3)

VERBOSE = 0

SPARSE_RESULT = []
SPARSE_RESULT_OPTIMIZED = []

SPARSE_COST = []
SPARSE_COST_OPTIMIZED = []

### Model creation function

In [6]:
def create_model(learning_rate=0.0001, input_dim=10, number_layers=1, neurons_per_layer=20, dropout_prob=0.2):
    # create model
    model = Sequential()

    model.add(Dropout(dropout_prob, seed=0))
    model.add(Dense(neurons_per_layer, input_shape=(input_dim,), activation='relu'))
    for _ in range(number_layers):
        model.add(Dropout(dropout_prob, seed=0))    
        model.add(Dense(neurons_per_layer, activation='relu'))
    model.add(Dense(1, activation=None))

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model


### Hyperparameter space

In [7]:
hyperparameterspace = {
    'epochs': ["interval-int", 1, 40],
    'batch_size': ["interval-int", 50, 1000],
    'learning rate': ["interval-log", 1e-10, 1e-1],
    'number_layers': ["interval-int", 1, 10],
    'neurons_per_layer': ["interval-int", 1, 40],
    'dropout_prob': ["interval", 0, 0.999]
}

hyperparameterspace_special = {}
for key in hyperparameterspace.keys():
    liste = []
    for i in range(1, len(hyperparameterspace[key])):
        liste.append(hyperparameterspace[key][i])
    hyperparameterspace_special[key] = liste

### Optimization

In [8]:

################## MODEL AND FUNCTION DEFINITION ####################
CV = 2
dataset = HPO.Dataset(task_id=359938)

def evaluate_model(epochs, batch_size, learning_rate, number_of_layers, neurons_per_layer, dropout_prob, deterministic=True):

    return epochs + batch_size + learning_rate + number_of_layers + neurons_per_layer


    kfold = KFold(n_splits=CV)

    split = (kfold.split(dataset.get_X(), dataset.get_Y()))

    values = []

    numeric_features = [not x for x in dataset.get_categorical_indicator()]
    numeric_transformer = Pipeline(
        steps=[("imputer", SimpleImputer(strategy="median")),
                ("scaler", StandardScaler())]
    )

    categorical_transformer = Pipeline(
        steps=[
            ("encoder", OneHotEncoder(
                handle_unknown="infrequent_if_exist", sparse_output=False)),
            # ("selector", SelectPercentile(chi2, percentile=50)),
        ]
    )

    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer,
                dataset.get_categorical_indicator()),
        ]
    )

    for i, (train_index, test_index) in enumerate(split):

        if deterministic:
            reset_seeds()

        X_train = dataset.get_X()[train_index]
        Y_train = dataset.get_Y()[train_index]

        X_val = dataset.get_X()[test_index]
        Y_val = dataset.get_Y()[test_index]

        preprocessor.fit(X_train, Y_train)

        X_train = preprocessor.transform(X_train)
        X_val = preprocessor.transform(X_val)

        regressor = KerasRegressor(model=create_model,
                                    learning_rate=learning_rate,
                                    input_dim=len(
                                        X_train[0]),
                                    number_layers=number_of_layers,
                                    neurons_per_layer=neurons_per_layer,
                                    dropout_prob=dropout_prob,
                                    verbose=0)

        regressor = TransformedTargetRegressor(regressor=regressor,
                                                transformer=StandardScaler())

        regressor.fit(X_train, Y_train, epochs=int(epochs),
                        batch_size=int(batch_size), shuffle=False)

        Y_predicted = regressor.predict(X_val)
        # error = sklearn.metrics.mean_absolute_error(Y_predicted, Y_val)
        error = sklearn.metrics.mean_absolute_percentage_error(
            Y_predicted, Y_val)
        values.append(error)

        del regressor
        K.clear_session()

    result = sum(values)/len(values)
    return result

def function(coordinates):
        
    return evaluate_model(epochs=coordinates[0], batch_size=coordinates[0], learning_rate=coordinates[2], number_of_layers=coordinates[3], neurons_per_layer=coordinates[4], dropout_prob=coordinates[5], deterministic=False)


N_ITERS = 11

ALT_1 = "{"
ALT_2 = "{"
ALT_3 = "{"

BUDGETS = [5, 10, 20, 30, 40, 50, 60]

for budget in BUDGETS:

    summe = 0
    for i in range(N_ITERS):

        optimizer = HPO.IterativeRandomOptimization(HPO.Dataset([],[]), function, 
                                                            hyperparameterspace, budget, 0, 0.9, 
                                                            init_points=15, 
                                                            alternative=0, ref_per_step=4)
        points = optimizer.fit()

        points.sort(key=operator.attrgetter('value'))
        summe += points[0].get_value()

    ALT_1 += "(" + str(budget) + "," + str(summe/N_ITERS) + ")"


    summe = 0
    for i in range(N_ITERS):

        optimizer = HPO.IterativeRandomOptimization(HPO.Dataset([],[]), function, 
                                                            hyperparameterspace, budget, 0, 0.8, 
                                                            init_points=15, 
                                                            alternative=1, ref_per_step=4)
        points = optimizer.fit()

        points.sort(key=operator.attrgetter('value'))
        summe += points[0].get_value()

    ALT_2 += "(" + str(budget) + "," + str(summe/N_ITERS) + ")"


    summe = 0
    for i in range(N_ITERS):

        optimizer = HPO.IterativeRandomOptimization(HPO.Dataset([],[]), function, 
                                                            hyperparameterspace, budget, 0, 0.9, 
                                                            init_points=15, 
                                                            alternative=2, ref_per_step=4)
        points = optimizer.fit()

        points.sort(key=operator.attrgetter('value'))
        summe += points[0].get_value()

    ALT_3 += "(" + str(budget) + "," + str(summe/N_ITERS) + ")"



    ALT_1 += "}"
    ALT_2 += "}"
    ALT_3 += "}"

    print(ALT_1+"\n")
    print(ALT_2+"\n")
    print(ALT_3+"\n")


{(5,39.911633472184285)(10,28.184326096028368)(20,20.822328724414646)(30,18.002789090940414)(40,18.000106608201577)(50,19.737573735551326)(60,23.09285117463779)}

{(5,44.4573499682562)(10,30.191633560819955)(20,11.927831753419248)(30,6.797546890999828)(40,4.854545454609092)(50,4.396643226971192)(60,4.009090909181819)}

{(5,38.36786030197544)(10,26.183473563067537)(20,16.128919602652935)(30,16.52013067842722)(40,10.818181821233052)(50,10.627417023079374)(60,8.009090909090908)}

