### Baseline experiment

Experiment to compare the 4 Optimization algorithms before trying to improve sparse search

In [1]:
import sys  
sys.path.insert(0, '../../')

import HPO
import pysgpp
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn.metrics
from sklearn.model_selection import KFold
import numpy as np
import keras
from keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasRegressor
from sklearn.compose import ColumnTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from numpy.random import seed
import random
import time
import os
import math
from matplotlib import cm
import matplotlib 

import operator

matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

random.seed(1)
seed(2)
tf.random.set_seed(3)

def reset_seeds():
    np.random.seed(1)
    random.seed(2)
    tf.random.set_seed(3)

VERBOSE = 0

SPARSE_RESULT = []
SPARSE_RESULT_OPTIMIZED = []

SPARSE_COST = []
SPARSE_COST_OPTIMIZED = []

2023-07-17 20:33:42.051835: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Model creation function

In [2]:
def create_model(learning_rate=0.0001, input_dim=10, number_layers=1, neurons_per_layer=20):
    # create model
    model = Sequential()

    model.add(Dense(neurons_per_layer, input_shape=(input_dim,), activation='relu'))
    for _ in range(number_layers):
        model.add(Dense(neurons_per_layer, activation='relu'))
    model.add(Dense(1, activation=None))

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model


### Hyperparameter space

In [3]:
hyperparameterspace = {
    'epochs': ["interval-int", 1, 40],
    #'batch_size': ["interval-int", 1, 200],
    'learning rate': ["interval-log", 1e-10, 1e-1],
    #'number_layers': ["interval-int", 1, 20],
    #'neurons_per_layer': ["interval-int", 1, 50]
}

hyperparameterspace_special = {}
for key in hyperparameterspace.keys():
    liste = []
    for i in range(1, len(hyperparameterspace[key])):
        liste.append(hyperparameterspace[key][i])
    hyperparameterspace_special[key] = liste

### Optimization

In [4]:
def visualize(points, budget, alternative):

    points.sort(key=operator.attrgetter('value'))
    x_values = []
    y_values = []
    z_values = []

    print(len(points))

    for i in range(len(points)):    
        x_values.append(points[i].get_coordinates()[0])
        y_values.append(np.log10(points[i].get_coordinates()[1]))
        z_values.append(points[i].get_value())
        print(i)
        print(points[i].get_value())
        print(points[i].get_coordinates())


    fig = plt.figure()
    ax = plt.axes()
    surface = plt.scatter(x_values, y_values, c=z_values, cmap="plasma")
    plt.scatter(points[0].get_coordinates()[0], points[0].get_coordinates()[1], c="white", marker="x")

    plt.colorbar(surface)
    # plt.gca().set_aspect("equal")

    plt.xlabel("Epochs")
    plt.ylabel("Log of learning rate")

    plt.savefig("./Alternative_"+str(alternative)+"_budget_"+str(budget)+".pgf",bbox_inches='tight' )

    plt.show()

In [5]:

################## MODEL AND FUNCTION DEFINITION ####################
CV = 2
dataset = HPO.Dataset(task_id=233211)

def evaluate_model(epochs, batch_size, learning_rate, number_of_layers, neurons_per_layer, deterministic=True):

    kfold = KFold(n_splits=CV)

    split = (kfold.split(dataset.get_X(), dataset.get_Y()))

    values = []

    numeric_features = [not x for x in dataset.get_categorical_indicator()]
    numeric_transformer = Pipeline(
        steps=[("imputer", SimpleImputer(strategy="median")),
                ("scaler", StandardScaler())]
    )

    categorical_transformer = Pipeline(
        steps=[
            ("encoder", OneHotEncoder(
                handle_unknown="infrequent_if_exist", sparse_output=False)),
            # ("selector", SelectPercentile(chi2, percentile=50)),
        ]
    )

    preprocessor = ColumnTransformer(
        transformers=[
            ("num", numeric_transformer, numeric_features),
            ("cat", categorical_transformer,
                dataset.get_categorical_indicator()),
        ]
    )

    for i, (train_index, test_index) in enumerate(split):

        if deterministic:
            reset_seeds()

        X_train = dataset.get_X()[train_index]
        Y_train = dataset.get_Y()[train_index]

        X_val = dataset.get_X()[test_index]
        Y_val = dataset.get_Y()[test_index]

        preprocessor.fit(X_train, Y_train)

        X_train = preprocessor.transform(X_train)
        X_val = preprocessor.transform(X_val)

        regressor = KerasRegressor(model=create_model,
                                    learning_rate=learning_rate,
                                    input_dim=len(
                                        X_train[0]),
                                    number_layers=number_of_layers,
                                    neurons_per_layer=neurons_per_layer,
                                    verbose=0)

        regressor = TransformedTargetRegressor(regressor=regressor,
                                                transformer=StandardScaler())

        regressor.fit(X_train, Y_train, epochs=int(epochs),
                        batch_size=int(batch_size), shuffle=False)

        Y_predicted = regressor.predict(X_val)
        # error = sklearn.metrics.mean_absolute_error(Y_predicted, Y_val)
        error = sklearn.metrics.mean_absolute_percentage_error(
            Y_predicted, Y_val)
        values.append(error)

        del regressor
        K.clear_session()

    result = sum(values)/len(values)
    return result

def function(coordinates):
        
    return evaluate_model(epochs=coordinates[0], batch_size=100, learning_rate=coordinates[1], number_of_layers=1, neurons_per_layer=30, deterministic=False)


BUDGETS = [10, 50, 100]

for budget in BUDGETS:

    optimizer = HPO.IterativeRandomOptimization(HPO.Dataset([],[]), function, 
                                                        hyperparameterspace, budget, 0, 0.75, 
                                                        init_points=10*len(hyperparameterspace.keys()), 
                                                        alternative=0, ref_per_step=4)
    points = optimizer.fit()

    visualize(points, budget, 0)

    optimizer = HPO.IterativeRandomOptimization(HPO.Dataset([],[]), function, 
                                                        hyperparameterspace, budget, 0, 0.35, 
                                                        init_points=8*len(hyperparameterspace.keys()), 
                                                        alternative=1, ref_per_step=4)
    points = optimizer.fit()

    visualize(points, budget, 1)

    optimizer = HPO.IterativeRandomOptimization(HPO.Dataset([],[]), function, 
                                                        hyperparameterspace, budget, 0, 0.6, 
                                                        init_points=7*len(hyperparameterspace.keys()), 
                                                        alternative=2, ref_per_step=4)
    points = optimizer.fit()

    visualize(points, budget, 2)


2023-07-17 20:33:45.904270: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


10
0
0.2705729529261589
[38, 0.00014035695885295977]
1
0.6579395234584808
[23, 0.033851311854663266]
2
0.6671659350395203
[40, 2.120717293813199e-08]
3
1.2357076704502106
[32, 2.5202607503124456e-08]
4
2.336891770362854
[39, 4.189894535337338e-06]
5
2.41215717792511
[12, 3.7448868234962615e-05]
6
18.29111820459366
[35, 2.4538050606644665e-09]
7
18.769420474767685
[16, 1.7113391527987206e-10]
8
21.600561320781708
[12, 6.071698793016847e-07]
9
39.67339900135994
[34, 2.8062834590978242e-06]


  plt.show()


10
0
0.5883013904094696
[5, 4.7115652930928336e-08]
1
0.6262516975402832
[16, 2.2265298401829723e-06]
2
0.7057435214519501
[27, 0.000808017074746705]
3
0.7087561190128326
[9, 9.28017439090659e-05]
4
0.7847044765949249
[32, 4.320592824376749e-09]
5
0.793942391872406
[9, 6.5416760912375625e-09]
6
1.027369737625122
[32, 7.130187465312951e-07]
7
2.5706967413425446
[40, 3.238991410570394e-09]
8
5.131153613328934
[13, 1.0817029284032699e-08]
9
62.03374630212784
[4, 5.209658948953548e-10]


  plt.show()


10
0
0.5973455086350441
[35, 0.0009750629732308114]
1
0.6392023712396622
[10, 2.888599864375346e-09]
2
0.6783811300992966
[13, 7.045530019272926e-07]
3
0.708414688706398
[33, 0.010131659260042482]
4
0.750618189573288
[7, 0.002124446104127426]
5
0.8561917394399643
[23, 0.00138911144646831]
6
1.9585015177726746
[38, 0.03834246612052938]
7
2.4895578622817993
[19, 1.981899440562325e-07]
8
26.85557046532631
[27, 3.852413432081724e-06]
9
32.54906910657883
[27, 6.881408357192254e-08]


  plt.show()


48
0
0.2537892907857895
[22, 0.00031718521722293554]
1
0.2671845927834511
[18, 0.0002977530014841256]
2
0.28901637345552444
[23, 0.00012248516592357886]
3
0.30785520374774933
[20, 0.000352559645017456]
4
0.32883454859256744
[19, 0.0001054923982258768]
5
0.3381599187850952
[19, 0.0003690869581881458]
6
0.3622681647539139
[5, 0.0029652453386255374]
7
0.3773841857910156
[20, 0.0006481509805465712]
8
0.3826977536082268
[34, 0.0007613953516009255]
9
0.4179232195019722
[38, 0.0009041423978163554]
10
0.4280003756284714
[23, 5.634302826299676e-05]
11
0.46929751336574554
[20, 0.00010126907923389917]
12
0.4714181646704674
[37, 0.0015019118756885474]
13
0.49429287016391754
[19, 4.75974809054879e-05]
14
0.5288431793451309
[10, 0.0027372348472198783]
15
0.5346284061670303
[19, 4.068030809722755e-05]
16
0.5777712240815163
[33, 0.001588486576095889]
17
0.5911276191473007
[37, 6.634948791892175e-07]
18
0.61004738509655
[38, 7.653144830229526e-07]
19
0.6285446584224701
[14, 1.4547302059254121e-05]
20
0

  plt.show()


48
0
0.31829535216093063
[25, 0.00010371544000365687]
1
0.4082110822200775
[38, 0.0004740185636278597]
2
0.5772362649440765
[20, 0.08469510239285678]
3
0.5787393599748611
[37, 4.068656443715955e-05]
4
0.6089644730091095
[23.75353343750099, 0.1]
5
0.62294802069664
[22.307352508164605, 0.1]
6
0.685304582118988
[27, 0.0033779937115554805]
7
0.7144554257392883
[40, 0.1]
8
0.7425756752490997
[21.70962964348676, 0.1]
9
0.749987006187439
[32.0796434178679, 0.1]
10
0.7650450170040131
[26.30427340504322, 0.1]
11
0.7743737101554871
[35, 1.8274537724110304e-07]
12
0.8048461973667145
[35.43074071296736, 0.1]
13
0.8126443028450012
[8, 0.026020855754356198]
14
0.814026802778244
[27.466672255221273, 1e-10]
15
0.9667912125587463
[36.361686834499174, 1e-10]
16
1.029121607542038
[22, 2.711350843945726e-08]
17
1.0457419157028198
[26.738770223842007, 1e-10]
18
1.0820908099412918
[24.519811787816035, 0.1]
19
1.0905222594738007
[21.584171913720823, 1e-10]
20
1.0948192477226257
[21.603989626718302, 1e-10]
21

  plt.show()


50
0
0.4416295289993286
[10, 0.009276263351687419]
1
0.4463846981525421
[12, 0.00017675548878891354]
2
0.4601663202047348
[4, 0.1]
3
0.47233037650585175
[4, 0.1]
4
0.5789429545402527
[38, 6.715699544227452e-06]
5
0.6304098218679428
[15, 0.1]
6
0.6317339092493057
[6, 0.1]
7
0.6381525099277496
[16, 0.1]
8
0.6475306004285812
[29, 0.1]
9
0.6721540689468384
[30, 0.1]
10
0.7148140966892242
[8, 9.144624717350326e-09]
11
0.7493140697479248
[13, 0.1]
12
0.764081746339798
[1, 0.0031543683834028844]
13
0.7694657444953918
[29, 1.063253332645656e-07]
14
0.7827570736408234
[8, 0.1]
15
0.8034963011741638
[15, 0.1]
16
0.8890878558158875
[30, 3.8152037764632864e-09]
17
0.9059871733188629
[7, 0.1]
18
0.9108931422233582
[9, 0.1]
19
0.9215880036354065
[39, 4.218465211704643e-07]
20
0.989027589559555
[35, 6.167253874137562e-09]
21
1.1057381629943848
[14, 0.1]
22
1.146844506263733
[19, 0.1]
23
1.1951628029346466
[7, 0.1]
24
1.2140287458896637
[6, 0.1]
25
1.2690786123275757
[7, 0.1]
26
1.269967645406723
[3, 

  plt.show()


100
0
0.2588002383708954
[12, 0.00022329814753365283]
1
0.28860660642385483
[12, 0.00023769410357577664]
2
0.29651669412851334
[13, 0.000232819293608788]
3
0.3103220760822296
[12, 0.00023954209332858624]
4
0.3183888867497444
[12, 0.00024307512887341117]
5
0.3222987875342369
[12, 0.00022108910327285045]
6
0.33062469959259033
[11, 0.000289059943750422]
7
0.3404953181743622
[34, 0.047786093305713456]
8
0.35791702568531036
[12, 0.00022480692108555988]
9
0.40404482930898666
[13, 0.001179417654326569]
10
0.4403342381119728
[36, 0.02150436111545466]
11
0.462142214179039
[11, 0.00022128687340256212]
12
0.4729507640004158
[13, 0.0009533579111692955]
13
0.4910805970430374
[29, 0.0013919912656609208]
14
0.49382057785987854
[10, 0.00681812625563511]
15
0.49464334547519684
[30, 0.029051493821071337]
16
0.503143772482872
[32, 2.6935214655922645e-06]
17
0.5256104841828346
[11, 0.00028531033563525894]
18
0.5524344444274902
[14, 0.00010706219035756147]
19
0.5852614641189575
[2, 0.0005126831976203136]
2

  plt.show()


100
0
0.3594621270895004
[18.874732797958792, 0.1]
1
0.3707423284649849
[20, 0.0009036648427015049]
2
0.39673009514808655
[25, 0.000603227961284483]
3
0.5115613490343094
[22.517257365266335, 0.0016705912549397318]
4
0.5255513787269592
[18.04713255067396, 0.1]
5
0.5755441039800644
[26.062397930150826, 0.1]
6
0.6059257835149765
[21.771829819177583, 0.1]
7
0.6260865777730942
[18.06414479817559, 0.1]
8
0.6294072717428207
[19.357743836209856, 0.1]
9
0.651616096496582
[22.067958421771728, 0.1]
10
0.6613273471593857
[22.168166589081235, 0.1]
11
0.6731633543968201
[19.796015531010234, 0.1]
12
0.7047006189823151
[20, 0.007564706610899174]
13
0.7137998044490814
[18.586640512693624, 0.1]
14
0.7146336734294891
[27.234031963169635, 0.1]
15
0.7149372398853302
[19.819051157870145, 0.1]
16
0.7265589833259583
[19.49386213448351, 0.1]
17
0.7375815361738205
[20.038260488233213, 0.1]
18
0.755180299282074
[18.547969078882645, 1e-10]
19
0.7558021247386932
[21.076854023935063, 1e-10]
20
0.7657688558101654
[1

  plt.show()


98
0
0.24357856065034866
[38, 0.0001735140992781449]
1
0.2611400783061981
[24, 0.00020787203406410807]
2
0.2823559492826462
[18, 0.00025348790572188025]
3
0.2983620837330818
[40, 7.143169741252496e-05]
4
0.33242112398147583
[26, 0.00022454536386077898]
5
0.3576144129037857
[10, 0.1]
6
0.5077108591794968
[16, 0.1]
7
0.5383814871311188
[40, 0.1]
8
0.6176203787326813
[19, 0.1]
9
0.6313491612672806
[10, 0.1]
10
0.6420164257287979
[13, 0.1]
11
0.6480769217014313
[17, 0.1]
12
0.668381541967392
[13, 0.1]
13
0.6814159750938416
[40, 0.1]
14
0.6907521486282349
[17, 0.1]
15
0.7099916338920593
[25, 0.1]
16
0.723438560962677
[18, 0.1]
17
0.7627235352993011
[30, 0.1]
18
0.7661861777305603
[40, 0.1]
19
0.7703199684619904
[29, 0.1]
20
0.7718284428119659
[23, 0.1]
21
0.7851706445217133
[16, 0.1]
22
0.8158988058567047
[20, 9.158769947291064e-07]
23
0.8276566565036774
[33, 0.1]
24
0.8282021582126617
[23, 0.1]
25
1.0421359539031982
[35, 0.1]
26
1.062530279159546
[11, 0.1]
27
1.1229112148284912
[3, 0.1]
28

  plt.show()
