### Baseline experiment

Experiment to compare the 4 Optimization algorithms before trying to improve sparse search

In [1]:
import sys  
sys.path.insert(0, '../')

import HPO
import pysgpp
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn.metrics
from sklearn.model_selection import KFold
import numpy as np
import keras
from keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasRegressor
from sklearn.compose import ColumnTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from numpy.random import seed
import random
import time
import os
import math
from matplotlib import cm
import matplotlib 

matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

random.seed(1)
seed(2)
tf.random.set_seed(3)

def reset_seeds():
    np.random.seed(1)
    random.seed(2)
    tf.random.set_seed(3)

VERBOSE = 0

SPARSE_RESULT = []
SPARSE_RESULT_OPTIMIZED = []

SPARSE_COST = []
SPARSE_COST_OPTIMIZED = []

2023-06-01 11:59:41.320200: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Model creation function

In [2]:
def create_model(learning_rate=0.0001, input_dim=10, number_layers=1, neurons_per_layer=20):
    # create model
    model = Sequential()

    model.add(Dense(neurons_per_layer, input_shape=(input_dim,), activation='relu'))
    for _ in range(number_layers):
        model.add(Dense(neurons_per_layer, activation='relu'))
    model.add(Dense(1, activation=None))

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(loss='mean_squared_error', optimizer=optimizer)
    return model


### Hyperparameter space

In [3]:
hyperparameterspace = {
    'x0': ["interval", -5, 10],
    'x1': ["interval", -5, 10],
    #'learning_rate': ["interval-log", 0.000000001, 0.1],
    #'number_layers': ["interval-int", 1, 20],
    #'neurons_per_layer': ["interval-int", 1, 50]
}

hyperparameterspace_special = {}
for key in hyperparameterspace.keys():
    liste = []
    for i in range(1, len(hyperparameterspace[key])):
        liste.append(hyperparameterspace[key][i])
    hyperparameterspace_special[key] = liste

### Optimization

In [4]:

################## MODEL AND FUNCTION DEFINITION ####################
CV = 2
dataset = HPO.Dataset(task_id=233211)

def evaluate_model(epochs, batch_size, learning_rate, number_of_layers, neurons_per_layer, deterministic=True):
    ################# RASTRIGIN #################
    sum = 0
    sum += epochs ** 2 - 10 * math.cos(2 * math.pi * epochs)
    sum += batch_size ** 2 - 10 * math.cos(2 * math.pi * batch_size)
    # sum += learning_rate ** 2 - 10 * math.cos(2 * math.pi * learning_rate)
    # sum += number_of_layers ** 2 - 10 * math.cos(2 * math.pi * number_of_layers)
    # sum += neurons_per_layer ** 2 - 10 * math.cos(2 * math.pi * neurons_per_layer)
    return len(hyperparameterspace) * 10 + sum


class ExampleFunction(pysgpp.ScalarFunction):

    def __init__(self):
        super(ExampleFunction, self).__init__(
            len(hyperparameterspace.keys()))

    def eval(self, x):
        # index = int(x[0]*(len(hyperparameterspace_special["loss"])-1))
        # hyperparameterspace_special["loss"][index]

        epochs = (HPO.from_standard(
            hyperparameterspace_special["x0"][0], hyperparameterspace_special["x0"][1], x[0]))

        batch_size = (HPO.from_standard(
            hyperparameterspace_special["x1"][0], hyperparameterspace_special["x1"][1], x[1]))

        # HPO.from_standard_log(hyperparameterspace_special["learning_rate"][
        model_learning_rate = 0
        #                  0], hyperparameterspace_special["learning_rate"][1], x[2])

        number_of_layers = 0  # int(HPO.from_standard(
        # hyperparameterspace_special["number_layers"][0], hyperparameterspace_special["number_layers"][1], x[3]))

        neurons_per_layer = 0  # int(HPO.from_standard(
        # hyperparameterspace_special["neurons_per_layer"][0], hyperparameterspace_special["neurons_per_layer"][1], x[4]))

        return evaluate_model(epochs, batch_size, model_learning_rate, number_of_layers, neurons_per_layer)

BUDGETS = [5, 80, 1000]


for budget in BUDGETS:

    optimizer = HPO.SparseGridSearchOptimization(dataset, ExampleFunction(), hyperparameterspace, budget, verbosity=0, degree=2, adaptivity=0.85)
    optimizer.fit()
    


Adaptive grid generation (Ritter-Novak)...
Done in 0ms.
Solving linear system (automatic method)...
Done in 0ms.
Optimizing (gradient descent)...
####  Local optimal point:
Done in 0ms.
-5.0
2.5
Optimal point evaluated: 51.25
Optimal point interpolated: 23.125
Optimizing (multi-start)...
####  Global optimal point:
-4.964750159389658
-4.981267481185872
Done in 0ms.
Optimal point evaluated: 49.77522505957953
Optimal point interpolated: -6.038569092254161


  plt.show()


Adaptive grid generation (Ritter-Novak)...
Done in 1ms.
Solving linear system (automatic method)...
Done in 0ms.
Optimizing (gradient descent)...
####  Local optimal point:
-1.25
-1.015625
Optimal point evaluated: 12.642146873903027
Optimal point interpolated: 12.642146873903027
Done in 0ms.
Optimizing (multi-start)...
####  Global optimal point:
0.15058107350878203
-1.0157695336706754
Done in 1ms.
Optimal point evaluated: 5.255233103512133
Optimal point interpolated: 5.667823716192226
Adaptive grid generation (Ritter-Novak)...
Done in 39ms.
Solving linear system (automatic method)...
Done in 1328ms.
Optimizing (gradient descent)...
####  Local optimal point:
-1.98974609375
-0.99365234375
Done in 1ms.
Optimal point evaluated: 4.975134049663653
Optimal point interpolated: 4.975134049661977
Optimizing (multi-start)...
####  Global optimal point:
0.031188672952392693
0.01510501316284607
Optimal point evaluated: 0.23760054273059694
Optimal point interpolated: 0.3333606498673794
Done in 13m