### MNIST dataset classification


In [1]:
import sys  
sys.path.insert(0, '../')


import HPO
import pysgpp
import matplotlib.pyplot as plt
import tensorflow as tf
import sklearn.metrics
from sklearn.model_selection import KFold
import numpy as np
import keras
from keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, MaxPooling2D, Conv2D
from scikeras.wrappers import KerasRegressor
from sklearn.compose import ColumnTransformer
from sklearn.compose import TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from numpy.random import seed
import random
import time

from tensorflow.keras import layers


random.seed(1)
seed(2)
tf.random.set_seed(3)

def reset_seeds():
    np.random.seed(1)
    random.seed(2)
    tf.random.set_seed(3)

VERBOSE = 1
CV = 2 #[(slice(None), slice(None))]

2023-06-26 22:13:51.067504: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Hyperparameter space definition

In [2]:
hyperparameterspace = {
    'epochs': ["interval-int", 1, 10],
    'batch_size': ["interval-int", 200, 2050],
    'learning_rate': ["interval-log", 0.000000001, 0.1],
    'number_conv_layers': ["interval-int", 1, 4],
    'number_fc_layers': ["interval-int", 1, 4],
    'kernel_size': ["interval-int", 3, 5],
    'pool_size': ["interval-int", 1, 3],
    'neurons_per_fc_layer': ["interval-int", 1, 10],
    'dropout_prob': ["interval", 0, 1]
}

hyperparameterspace_special = {}
for key in hyperparameterspace.keys():
    liste = []
    for i in range(1, len(hyperparameterspace[key])):
        liste.append(hyperparameterspace[key][i])
    hyperparameterspace_special[key] = liste



### Model definition

In [3]:

num_classes = 10
input_shape = (28, 28, 1)


def create_model(learning_rate=1e-4, number_conv_layers=2, number_fc_layers=0, kernel_size=3, pool_size=2, neurons_per_fc_layer=4, dropout_prob=0.5):
    # create model
    model = Sequential()

    model.add(keras.Input(shape=input_shape))

    for _ in range(number_conv_layers):
        model.add(layers.Conv2D(32, kernel_size=(kernel_size, kernel_size), activation="relu"))
        model.add(layers.MaxPooling2D(pool_size=(pool_size, pool_size), padding='same'))
    model.add(layers.Flatten())
    model.add(layers.Dropout(dropout_prob))

    for _ in range(number_fc_layers):
        model.add(layers.Dense(neurons_per_fc_layer, activation="relu"))

    model.add(layers.Dense(num_classes, activation="softmax"))

    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

    return model


### Experiment parameters

In [4]:
DIRECTORY = "Current_tests/"+time.strftime("%H_%M_%S", time.localtime())

SPARSE_PARAMS = [2, 0.85, "gradient_descent"]

BUDGETS = [3, 5, 10, 30, 50, 70, 90, 110]

### Optimization

In [5]:
################## MODEL AND FUNCTION DEFINITION ####################

def evaluate_model(epochs, batch_size, learning_rate, number_conv_layers, number_fc_layers, kernel_size, pool_size, neurons_per_fc_layer, dropout_prob, deterministic=True):

    # if too many layers and resulting image has no values left, return accuracy 0

    current_size = 28
    for _ in range(number_conv_layers + 1):
        current_size = (current_size - kernel_size) + 1
        current_size = (current_size - pool_size) + 1

    if current_size <= 1:
        return 0


    if deterministic:
        reset_seeds()

    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    # Scale images to the [0, 1] range
    x_train = x_train.astype("float32") / 255
    x_test = x_test.astype("float32") / 255

    # Make sure images have shape (28, 28, 1)
    x_train = np.expand_dims(x_train, -1)
    x_test = np.expand_dims(x_test, -1)

    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    model = create_model(learning_rate, number_conv_layers, number_fc_layers, kernel_size, pool_size, neurons_per_fc_layer, dropout_prob)

    model.fit(x_train, y_train, verbose=0, batch_size=batch_size, epochs=epochs, validation_split=0.1, shuffle=False)

    score = model.evaluate(x_test, y_test, verbose=0)

    K.clear_session()
    del model 
    K.clear_session()

    return -score[1]

    

def blackboxfunction_grid(params):

    epochs = int(params[0])

    batch_size = int(params[1])

    learning_rate = params[2]

    number_conv_layers = int(params[3])

    number_fc_layers = int(params[4])

    kernel_size = int(params[5])

    pool_size = int(params[6])

    neurons_per_fc_layer = int(params[7])

    dropout_prob = params[8]

    return evaluate_model(epochs, batch_size, learning_rate, number_conv_layers, number_fc_layers, kernel_size, pool_size, neurons_per_fc_layer, dropout_prob)

def blackboxfunction_random(params):
    
    epochs = int(params[0])

    batch_size = int(params[1])

    learning_rate = params[2]

    number_conv_layers = int(params[3])

    number_fc_layers = int(params[4])

    kernel_size = int(params[5])

    pool_size = int(params[6])

    neurons_per_fc_layer = int(params[7])

    dropout_prob = params[8]

    return evaluate_model(epochs, batch_size, learning_rate, number_conv_layers, number_fc_layers, kernel_size, pool_size, neurons_per_fc_layer, dropout_prob, deterministic=False)

def blackboxfunction_bayesian(params):
    
    epochs = int(params[0])

    batch_size = int(params[1])

    learning_rate = 10 ** params[2]

    number_conv_layers = int(params[3])

    number_fc_layers = int(params[4])

    kernel_size = int(params[5])

    pool_size = int(params[6])

    neurons_per_fc_layer = int(params[7])

    dropout_prob = params[8]

    return evaluate_model(epochs, batch_size, learning_rate, number_conv_layers, number_fc_layers, kernel_size, pool_size, neurons_per_fc_layer, dropout_prob, deterministic=False)

##################### Function for sparse grid search #####################

class ExampleFunction(pysgpp.ScalarFunction):

    def __init__(self):
        super(ExampleFunction, self).__init__(
            len(hyperparameterspace.keys()))

    def eval(self, x):

        epochs = int(HPO.from_standard(
            hyperparameterspace_special["epochs"][0], hyperparameterspace_special["epochs"][1], x[0]))

        batch_size = int(HPO.from_standard(
            hyperparameterspace_special["batch_size"][0], hyperparameterspace_special["batch_size"][1], x[1]))

        learning_rate = HPO.from_standard_log(
            hyperparameterspace_special["learning_rate"][0], hyperparameterspace_special["learning_rate"][1], x[2])

        number_conv_layers = int(HPO.from_standard(
            hyperparameterspace_special["number_conv_layers"][0], hyperparameterspace_special["number_conv_layers"][1], x[3]))

        number_fc_layers = int(HPO.from_standard(
            hyperparameterspace_special["number_fc_layers"][0], hyperparameterspace_special["number_fc_layers"][1], x[4]))

        kernel_size = int(HPO.from_standard(
            hyperparameterspace_special["kernel_size"][0], hyperparameterspace_special["kernel_size"][1], x[5]))

        pool_size = int(HPO.from_standard(
            hyperparameterspace_special["pool_size"][0], hyperparameterspace_special["pool_size"][1], x[6]))

        neurons_per_fc_layer = int(HPO.from_standard(
            hyperparameterspace_special["neurons_per_fc_layer"][0], hyperparameterspace_special["neurons_per_fc_layer"][1], x[7]))

        dropout_prob = HPO.from_standard(
            hyperparameterspace_special["dropout_prob"][0], hyperparameterspace_special["dropout_prob"][1], x[8])

        return evaluate_model(epochs, batch_size, learning_rate, number_conv_layers, number_fc_layers, kernel_size, pool_size, neurons_per_fc_layer, dropout_prob)



RESULTS_GRID = "{"
RESULTS_RANDOM = "{"
RESULTS_BAYESIAN = "{"
RESULTS_SPARSE = "{"

dataset = HPO.Dataset([], [])

##### For each dataset: run models with different budget #####
for BUDGET in BUDGETS:

    print("\n################################################## Current Budget:",
            BUDGET, "##################################################")

    ############################## GRID SEARCH #######################
    if BUDGET == 3:

        print("\nPerforming grid search")
        optimization = HPO.GridSearchOptimization(
            dataset, blackboxfunction_grid, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, cv=CV)

        result, cost = optimization.fit()

        index_best = 0
        for m in range(len(result)):
            if result[m][1] < result[index_best][1]:
                index_best = m

        best_score = result[index_best][1]
        best_params = result[index_best][0]
        

        print("Best score with Grid search:", best_score)

        if VERBOSE > 0:
            print("With Hyperparameters: ")
            m = 0
            for key in hyperparameterspace.keys():
                if hyperparameterspace[key][0] == "list":
                    index = int(
                        best_params[m]*(len(hyperparameterspace_special[key])-1))
                    print(key + ": " +
                        str(hyperparameterspace_special[key][index]))
                else:
                    print(key + ": " + str(best_params[m]))
                m += 1

        print("Best score with Grid search:", best_score)

        RESULTS_GRID += "(" + str(cost) + "," + str(-best_score) + ")"

        K.clear_session()

    # ########################### RANDOM SEARCH #######################
    print("\nPerforming random search")

    optimization = HPO.RandomSearchOptimization(
        dataset, blackboxfunction_random, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, cv=CV)

    result, cost = optimization.fit()

    index_best = 0
    for m in range(len(result)):
        if result[m][1] < result[index_best][1]:
            index_best = m

    best_score = result[index_best][1]
    best_params = result[index_best][0]
    
    if VERBOSE > 0:
        print("With Hyperparameters: ")
        m = 0
        for key in hyperparameterspace.keys():
            if hyperparameterspace[key][0] == "list":
                index = int(
                    best_params[m]*(len(hyperparameterspace_special[key])-1))
                print(key + ": " +
                      str(hyperparameterspace_special[key][index]))
            else:
                print(key + ": " + str(best_params[m]))
            m += 1

    print("Best score with Random search:", best_score)

    RESULTS_RANDOM += "(" + str(cost) + "," + str(-best_score) + ")"
    

    K.clear_session()

    ########################### BAYESIAN OPT #####################
    print("\nPerforming bayesian optimization")

    optimization = HPO.BayesianOptimization(
        dataset, blackboxfunction_bayesian, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE)

    result, cost = optimization.fit()

    index_best = 0
    for m in range(len(result)):
        if result[m][1] < result[index_best][1]:
            index_best = m

    best_score = result[index_best][1]
    best_params = result[index_best][0]

    
    
    if VERBOSE > 0:
        print("With Hyperparameters: ")
        m = 0
        for key in hyperparameterspace.keys():
            if hyperparameterspace[key][0] == "list":
                index = int(
                    best_params[m]*(len(hyperparameterspace_special[key])-1))
                print(key + ": " +
                      str(hyperparameterspace_special[key][index]))
            else:
                print(key + ": " + str(best_params[m]))
            m += 1
    

    print("Best score with Bayesian Optimization:", best_score)


    RESULTS_BAYESIAN += "(" + str(BUDGET) + "," + str(-best_score) + ")"
    

    K.clear_session()

    ########################### SPARSE OPT ############################

    print("\nPerforming sparse search")

    f = ExampleFunction()

    optimization = HPO.SparseGridSearchOptimization(
        dataset, f, hyperparameterspace, budget=BUDGET, verbosity=VERBOSE, degree=SPARSE_PARAMS[0], adaptivity=SPARSE_PARAMS[1], optimizer=SPARSE_PARAMS[2])

    [X0, fX0, X1, fX1, X2, fX2], cost = optimization.fit()

    cost = cost + 2
    bestFX = fX0 
    bestX = X0
    if fX1 < bestFX:
        bestFX = fX1 
        bestX = X1 
    if fX2 < bestFX:
        bestFX = fX2
        bestX = X2

    RESULTS_SPARSE += "(" + str(cost) + "," + str(-bestFX) + ")"

    if VERBOSE > 0:
        print("With Hyperparameters: ")
        m = 0
        for key in hyperparameterspace.keys():
            if hyperparameterspace[key][0] == "list":
                index = int(
                    X0[m]*(len(hyperparameterspace_special[key])-1))
                print(key + ": " +
                      str(hyperparameterspace_special[key][index]))
            else:
                print(key + ": " + str(X0[m]))
            m += 1

    K.clear_session()

    print("GRID SEARCH")
    print(RESULTS_GRID+"}")

    print("RANDOM SEARCH")
    print(RESULTS_RANDOM+"}")

    print("BAYESIAN SEARCH")
    print(RESULTS_BAYESIAN+"}")

    print("SPARSE SEARCH")
    print(RESULTS_SPARSE+"}")


print("GRID SEARCH")
print(RESULTS_GRID+"}")

print("RANDOM SEARCH")
print(RESULTS_RANDOM+"}")

print("BAYESIAN SEARCH")
print(RESULTS_BAYESIAN+"}")

print("SPARSE SEARCH")
print(RESULTS_SPARSE+"}")




################################################## Current Budget: 3 ##################################################

Performing grid search


2023-06-26 22:13:55.202583: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Best score with Grid search: -0.12549999356269836
With Hyperparameters: 
epochs: 5
batch_size: 1125
learning_rate: 9.999999999999997e-06
number_conv_layers: 2
number_fc_layers: 2
kernel_size: 4
pool_size: 2
neurons_per_fc_layer: 5
dropout_prob: 0.5
Best score with Grid search: -0.12549999356269836

Performing random search
With Hyperparameters: 
epochs: 8
batch_size: 949
learning_rate: 0.00022801756022107158
number_conv_layers: 2
number_fc_layers: 3
kernel_size: 3
pool_size: 1
neurons_per_fc_layer: 6
dropout_prob: 0.6852195003967595
Best score with Random search: -0.6018000245094299

Performing bayesian optimization
Iterations took 1380.5690248129977 seconds
With Hyperparameters: 
epochs: 7.0
batch_size: 689.0
learning_rate: -4.7939778468317735
number_conv_layers: 3.0
number_fc_layers: 3.0
kernel_size: 3.0
pool_size: 1.0
neurons_per_fc_layer: 8.0
dropout_prob: 0.7715949980443677
Best score with Bayesian Optimization: -0.18870000541210175

Performing sparse search
Adaptive grid generati



Iterations took 1426.718432362999 seconds
With Hyperparameters: 
epochs: 7.0
batch_size: 867.0
learning_rate: -4.296442305644123
number_conv_layers: 3.0
number_fc_layers: 1.0
kernel_size: 4.0
pool_size: 1.0
neurons_per_fc_layer: 3.0
dropout_prob: 0.7903735226179732
Best score with Bayesian Optimization: -0.5080999732017517

Performing sparse search
Adaptive grid generation (Ritter-Novak)...
Done in 78805ms.
Solving linear system (automatic method)...
Done in 0ms.
Optimizing (gradient descent)...
Done in 0ms.
Optimizing (multi-start)...
Done in 0ms.
With Hyperparameters: 
epochs: 0.5
batch_size: 0.5
learning_rate: 0.5
number_conv_layers: 0.5
number_fc_layers: 0.5
kernel_size: 0.5
pool_size: 0.5
neurons_per_fc_layer: 0.5
dropout_prob: 0.5
GRID SEARCH
{(1,0.12549999356269836)}
RANDOM SEARCH
{(3,0.6018000245094299)(5,0.6134999990463257)}
BAYESIAN SEARCH
{(3,0.18870000541210175)(5,0.5080999732017517)}
SPARSE SEARCH
{(3,0.12549999356269836)(3,0.3776000142097473)}

###########################

ValueError: Exception encountered when calling layer "max_pooling2d_2" (type MaxPooling2D).

Negative dimension size caused by subtracting 2 from 1 for '{{node max_pooling2d_2/MaxPool}} = MaxPool[T=DT_FLOAT, data_format="NHWC", explicit_paddings=[], ksize=[1, 2, 2, 1], padding="VALID", strides=[1, 2, 2, 1]](Placeholder)' with input shapes: [?,1,1,32].

Call arguments received by layer "max_pooling2d_2" (type MaxPooling2D):
  • inputs=tf.Tensor(shape=(None, 1, 1, 32), dtype=float32)