## Example for predicting the pima indians diabetes dataset


## Imports and dataset

In [4]:
import HPO

import pandas as pd

import pysgpp

import sys

import math
import matplotlib.pyplot as plt
import torch

from sklearn.model_selection import cross_val_score

import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow.keras.optimizers as opt
from scikeras.wrappers import KerasRegressor, KerasClassifier

import sklearn.metrics

def to_standard(lower, upper, value):
    return (value-lower)/(upper-lower)


def from_standard(lower, upper, value):
    return value*(upper-lower)+lower


dataset = np.loadtxt("pima-indians-diabetes.csv", delimiter=",")
print(dataset.shape)
# separate the data from the target attributes
X = dataset[:,0:7]
Y = dataset[:,8]

dataset = HPO.Dataset(X, Y)


(768, 9)


## Hyperparameterspace & Model & Function

In [5]:
BUDGET = 400
VERBOSE = 2
CV = 2
SCORING = 'accuracy'

hyperparameterspace = {
    'model__loss': ["list", 'binary_crossentropy', 'categorical_crossentropy', 'binary_crossentropy', 'mean_squared_error', 'mean_absolute_error'],
    'model__optimizer': ["list", 'sgd', 'rmsprop', 'adam', 'adadelta', 'adagrad', 'adamax', 'nadam', 'ftrl'],
    'epochs': ["interval-int", 1, 400],
    'optimizer__learning_rate': ["interval", 0.000001, 0.01]
}

##################### Model for grid and random search #####################

# Function to create model, required for KerasClassifier
def create_model(optimizer, loss):
    # create model
    model = Sequential()
    model.add(Dense(10, input_shape=(7,), activation='relu'))
    model.add(Dense(20, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(20, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    if optimizer == 'sgd':
        optimizer = opt.SGD()
    elif optimizer == 'rmsprop':
        optimizer = opt.RMSprop()
    elif optimizer == 'adam':
        optimizer = opt.Adam()
    elif optimizer == 'adadelta':
        optimizer = opt.Adadelta()
    elif optimizer == 'adagrad':
        optimizer = opt.Adagrad()
    elif optimizer == 'adamax':
        optimizer = opt.Adamax()
    elif optimizer == 'nadam':
        optimizer = opt.Nadam()
    elif optimizer == 'ftrl':
        optimizer = opt.Ftrl()

    # Compile model
    model.compile(loss=loss,
                  optimizer=optimizer)
    return model

model = KerasClassifier(model=create_model, verbose=0)

##################### Blackbox function for bayesian optimization #####################

hyperparameterspace_special = {}
for key in hyperparameterspace.keys():
    liste = []
    for i in range(1, len(hyperparameterspace[key])):
        liste.append(hyperparameterspace[key][i])
    hyperparameterspace_special[key] = liste


def blackboxfunction(params):  # Maximizing 
    index = int(params[0]*(len(hyperparameterspace_special["model__loss"])-1))
    loss = hyperparameterspace_special["model__loss"][index]
    
    index = int(params[1]*(len(hyperparameterspace_special["model__optimizer"])-1))
    model_optimizer = hyperparameterspace_special["model__optimizer"][index]

    epochs = int(params[2])

    learning_rate = params[3]

    # Function to create model, required for KerasClassifier
    def create_model():
        # create model
        model = Sequential()
        model.add(Dense(10, input_shape=(7,), activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(20, activation='relu'))
        model.add(Dense(10, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))

        if model_optimizer == 'sgd':
            optimizer = opt.SGD(learning_rate=learning_rate)
        elif model_optimizer == 'rmsprop':
            optimizer = opt.RMSprop(learning_rate=learning_rate)
        elif model_optimizer == 'adam':
            optimizer = opt.Adam(learning_rate=learning_rate)
        elif model_optimizer == 'adadelta':
            optimizer = opt.Adadelta(learning_rate=learning_rate)
        elif model_optimizer == 'adagrad':
            optimizer = opt.Adagrad(learning_rate=learning_rate)
        elif model_optimizer == 'adamax':
            optimizer = opt.Adamax(learning_rate=learning_rate)
        elif model_optimizer == 'nadam':
            optimizer = opt.Nadam(learning_rate=learning_rate)
        elif model_optimizer == 'ftrl':
            optimizer = opt.Ftrl(learning_rate=learning_rate)

        # Compile model
        model.compile(loss=loss,
                    optimizer=optimizer)
        return model

    model = KerasClassifier(model=create_model, verbose=0)


    model.fit(dataset.get_X_train(), dataset.get_Y_train(), epochs=epochs)

    Y_predicted = model.predict(dataset.get_X_test())

    return sklearn.metrics.accuracy_score(dataset.get_Y_test(), Y_predicted)
    

##################### Function for sparse grid search #####################
class ExampleFunction(pysgpp.ScalarFunction): # Minimizing

    def __init__(self):
        super(ExampleFunction, self).__init__(len(hyperparameterspace))


    def eval(self, x):
        index = int(x[0]*(len(hyperparameterspace_special["model__loss"])-1))
        loss = hyperparameterspace_special["model__loss"][index]
        
        index = int(x[1]*(len(hyperparameterspace_special["model__optimizer"])-1))
        model_optimizer = hyperparameterspace_special["model__optimizer"][index]
        
        
        epochs = int(from_standard(1, 400, x[2]))

        learning_rate = from_standard(0.000001, 0.001, x[3])
        
        # Function to create model, required for KerasClassifier
        def create_model():
            # create model
            model = Sequential()
            model.add(Dense(10, input_shape=(7,), activation='relu'))
            model.add(Dense(20, activation='relu'))
            model.add(Dense(10, activation='relu'))
            model.add(Dense(20, activation='relu'))
            model.add(Dense(10, activation='relu'))
            model.add(Dense(1, activation='sigmoid'))

            if model_optimizer == 'sgd':
                optimizer = opt.SGD(learning_rate=learning_rate)
            elif model_optimizer == 'rmsprop':
                optimizer = opt.RMSprop(learning_rate=learning_rate)
            elif model_optimizer == 'adam':
                optimizer = opt.Adam(learning_rate=learning_rate)
            elif model_optimizer == 'adadelta':
                optimizer = opt.Adadelta(learning_rate=learning_rate)
            elif model_optimizer == 'adagrad':
                optimizer = opt.Adagrad(learning_rate=learning_rate)
            elif model_optimizer == 'adamax':
                optimizer = opt.Adamax(learning_rate=learning_rate)
            elif model_optimizer == 'nadam':
                optimizer = opt.Nadam(learning_rate=learning_rate)
            elif model_optimizer == 'ftrl':
                optimizer = opt.Ftrl(learning_rate=learning_rate)

            # Compile model
            model.compile(loss=loss,
                        optimizer=optimizer)
            return model

        model = KerasClassifier(model=create_model, verbose=0)


        model.fit(dataset.get_X_train(), dataset.get_Y_train(), epochs=epochs)

        Y_predicted = model.predict(dataset.get_X_test())

        return -sklearn.metrics.accuracy_score(dataset.get_Y_test().tolist(), Y_predicted)

## Grid search

In [6]:
optimization = HPO.Optimization(dataset, model, hyperparameterspace, type="grid_search", cv=CV, scoring=SCORING, budget=BUDGET, verbosity=VERBOSE)
result = optimization.fit()

print("Best result:")
print(result.best_score_)
print("Corresponding parameters:")
print(result.best_params_)

Fitting 2 folds for each of 360 candidates, totalling 720 fits
[CV] END epochs=67, model__loss=binary_crossentropy, model__optimizer=sgd, optimizer__learning_rate=0.0016675000000000001; total time=   2.1s
[CV] END epochs=67, model__loss=binary_crossentropy, model__optimizer=sgd, optimizer__learning_rate=0.0016675000000000001; total time=   1.9s
[CV] END epochs=67, model__loss=binary_crossentropy, model__optimizer=sgd, optimizer__learning_rate=0.005000500000000001; total time=   1.9s
[CV] END epochs=67, model__loss=binary_crossentropy, model__optimizer=sgd, optimizer__learning_rate=0.005000500000000001; total time=   1.9s
[CV] END epochs=67, model__loss=binary_crossentropy, model__optimizer=sgd, optimizer__learning_rate=0.0083335; total time=   1.9s
[CV] END epochs=67, model__loss=binary_crossentropy, model__optimizer=sgd, optimizer__learning_rate=0.0083335; total time=   1.8s
[CV] END epochs=67, model__loss=binary_crossentropy, model__optimizer=rmsprop, optimizer__learning_rate=0.00166

## Random search

In [None]:
optimization = HPO.Optimization(dataset, model, hyperparameterspace, type="random_search", cv=CV, scoring=SCORING, budget=BUDGET, verbosity=VERBOSE)
result = optimization.fit()
print("Best result:")
print(result.best_score_)
print("Corresponding parameters:")
print(result.best_params_)

Fitting 2 folds for each of 400 candidates, totalling 800 fits
[CV] END epochs=150, model__loss=binary_crossentropy, model__optimizer=nadam, optimizer__learning_rate=0.008750125000000001; total time=   5.7s
[CV] END epochs=150, model__loss=binary_crossentropy, model__optimizer=nadam, optimizer__learning_rate=0.008750125000000001; total time=   5.6s
[CV] END epochs=50, model__loss=categorical_crossentropy, model__optimizer=sgd, optimizer__learning_rate=0.008750125000000001; total time=   1.5s
[CV] END epochs=50, model__loss=categorical_crossentropy, model__optimizer=sgd, optimizer__learning_rate=0.008750125000000001; total time=   1.7s
[CV] END epochs=150, model__loss=binary_crossentropy, model__optimizer=ftrl, optimizer__learning_rate=0.001250875; total time=   5.9s
[CV] END epochs=150, model__loss=binary_crossentropy, model__optimizer=ftrl, optimizer__learning_rate=0.001250875; total time=   4.4s
[CV] END epochs=150, model__loss=binary_crossentropy, model__optimizer=adam, optimizer__l

## Bayesian Optimization 

https://thuijskens.github.io/2016/12/29/bayesian-optimisation/ 

In [None]:
optimization = HPO.Optimization(dataset, blackboxfunction, hyperparameterspace, type="bayesian", budget=BUDGET, verbosity=VERBOSE)
result = optimization.fit()

# fig = plt.figure()
# ax = plt.axes(projection='3d')

# x = [result[0][i][0] for i in range(len(result[0]))]
# y = [result[0][i][1] for i in range(len(result[0]))]

# ax.scatter3D(x, y, result[1], c=result[1], cmap='Greens')

index_best = 0
for i in range(len(result[1])):
    if result[1][i] == max(result[1]):
        index_best = i

best_score = result[1][index_best]
best_params = result[0][index_best]


print("Best MSE:")
print(best_score)
print("With Hyperparameters: ")
i = 0
for key in hyperparameterspace.keys():
    if hyperparameterspace[key][0] == "list":
        index = int(best_params[i]*(len(hyperparameterspace_special[key])-1))
        print(key + ": " + str(hyperparameterspace_special[key][index]))
    else:
        print(key + ": " + str(best_params[i]))
    i += 1


print([result[0][i] for i in range(len(result[0])) if result[1][i] == max(result[1])])
#print(result)

Progress: [#############-------] 65.5%
Current time per iteration: 641.6500079880061
Approx. time remaining: 3621.258511889393


: 

: 

## Sparse grid search

In [None]:
f = ExampleFunction()

optimization = HPO.Optimization(dataset, f, hyperparameterspace, type="sparse", budget=BUDGET, verbosity=VERBOSE)
result = optimization.fit()


Adaptive grid generation (Ritter-Novak)...


2023-03-01 08:28:55.697722: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:267] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-03-01 08:28:55.697845: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (maxi-XPS-15-9560): /proc/driver/nvidia/version does not exist
2023-03-01 08:28:55.701397: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Done in 360918ms.
Solving linear system (automatic method)...
Done in 0ms.

Optimal hyperparameters so far:
model__loss: binary_crossentropy
model__optimizer: adam
epochs: 200.5
optimizer__learning_rate: 0.0025007500000000004
Resulting loss:
-0.7272727272727271
Optimizing (gradient descent)...
Done in 0ms.

Optimal hyperparameters after optimization:
model__loss: categorical_crossentropy
model__optimizer: adadelta
epochs: 152.68017616441352
optimizer__learning_rate: 0.0020361618892724143
Resulting loss (Optimal value from optimization):
-0.9116671830637957
Resulting loss (Optimal point evaluated):
-0.7142857142857143
