# Hyperparameteroptimization

https://towardsdatascience.com/bayesian-optimization-with-python-85c66df711ec 

Steps:
   1. Definition of the data
   2. Definition of the hyperparameter space
   3. Loop over all different combinations of the hyperparamter space
       1. Define the model with the hyperparameters
       2. Optimize model (learning phase)
       3. Evaluate model and store metric with the parameters
 

## 0. Imports & utils

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
from sklearn import svm, metrics
from sklearn.utils import shuffle
from sklearn.model_selection import cross_val_score


import timeit

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import itertools

from mpl_toolkits import mplot3d


from scipy.io import arff
import pandas as pd

%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import time, sys
from IPython.display import clear_output

import pysgpp

from bayes_opt import BayesianOptimization, UtilityFunction


def update_progress(progress, time, remaining_time):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1

    block = int(round(bar_length * progress))
    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    text += "\nCurrent time per iteration: " + str(time)
    text += "\nApprox. time remaining: " + str(remaining_time)
    print(text)

    
def to_standard(lower, upper, value):
    return (value-lower)/(upper-lower)


def from_standard(lower, upper, value):
    return value*(upper-lower)+lower

## 1. Definition of Data

In [3]:
SPLIT_RATIO = 0.8

data = arff.loadarff('php0iVrYT.arff')
df = pd.DataFrame(data[0])

df['Class'].replace([b'1', b'2'], [1, 2], inplace=True)

X = []
Y = []

for i in range(len(df)):
    data = []
    data.append(df['V1'][i])
    data.append(df['V2'][i])
    data.append(df['V3'][i])
    data.append(df['V4'][i])
    
    X.append(data)
    Y.append(df['Class'][i])

print("Number of samples: ", len(X))

X, Y = shuffle(X, Y)
X = np.array(X[:740])
Y = np.array(Y[:740])

# 10 fold validation:
size_chunks = int(len(X)/10)
X_folds = [X[x:x+size_chunks] for x in range(0, len(X)-size_chunks, size_chunks)]
Y_folds = [Y[x:x+size_chunks] for x in range(0, len(Y)-size_chunks, size_chunks)]

#X_train = torch.Tensor(X[:int(len(X) * SPLIT_RATIO)])
#X_test = torch.Tensor(X[int(len(X) * SPLIT_RATIO):])
#Y_train = torch.Tensor(Y[:int(len(Y) * SPLIT_RATIO)])
#Y_test = torch.Tensor(Y[int(len(Y) * SPLIT_RATIO):])

#print("Number of training samples: ", len(X_train))
#print("Number of testing samples: ", len(X_test))

Number of samples:  748


## 2. Definition of Hyperparameter space

In [3]:
hyperparameters = {
    "C" : [0.1, 10],
    "kernel" : [1, 5], #["set", "linear", "poly", "rbf", "sigmoid", "precomputed"],
    "degree" : [1, 5],
    "gamma" : [1, 3], #["set", "scale", "auto"],
    "tol" : [0.000001, 0.1]
}

print("Number of hyperparameters: ", len(hyperparameters))

Number of hyperparameters:  5


## 3. Loop over combinations

In [4]:
def black_box_function(C, kernel, degree, gamma, tol):
    
    if int(kernel) == 1:
        kernel = "linear"
    elif int(kernel) == 2:
        kernel = "poly"
    elif int(kernel) == 3:
        kernel = "rbf"
    elif int(kernel) == 4:
        kernel = "sigmoid"
    elif int(kernel) == 5:
        kernel = "precomputed"
        
    if int(gamma) == 1:
        gamma = "scale"
    elif int(gamma) == 2:
        gamma = "auto"
    
    # 10 fold cross validation
    
    classifier = svm.SVC(C=C, kernel=kernel, degree=int(degree), gamma=gamma, tol=tol)
    #classifier.fit(X_train, Y_train)
    scores = cross_val_score(classifier, X, Y, cv=10)

    # Y_predicted = classifier.predict(X_test)

    #accuracy = metrics.accuracy_score(Y_test, Y_predicted)
    #accuracy_list.append(accuracy)
    accuracy = scores.mean()
            
    return accuracy

    
optimizer = BayesianOptimization(f = black_box_function,
                                 pbounds = hyperparameters, verbose = 2,
                                 random_state = 4)

# optimizer.maximize(init_points = 5, n_iter = 200)

utility = UtilityFunction(kind = "ucb", kappa = 1.96, xi = 0.01)

time = 0
number_iterations = 10
for i in range(number_iterations):
    
    starttime = timeit.default_timer()
    
    # Get optimizer to suggest new parameter values to try using the
    # specified acquisition function.
    next_point = optimizer.suggest(utility)    # Force degree from float to int.
    
    # the new parameter values.
    target = black_box_function(**next_point)    
    try:
        # Update the optimizer with the evaluation results. 
        # This should be in try-except to catch any errors!
        optimizer.register(params = next_point, target = target)
    except:
        pass
    
    percentage = i/number_iterations

    endtime = timeit.default_timer()
    time += (endtime-starttime)
    
    remaining_time_prediction = (time/(i+1))*number_iterations - time
    
    update_progress(percentage, (endtime-starttime), remaining_time_prediction)
    

print("Iterations took ", time, " seconds.")    

    

Progress: [--------------------] 0.0%
Current time per iteration: 0.17038570900012928
Approx. time remaining: 1.5334713810011635


KeyboardInterrupt: 

## 4. Plotting and Result

In [None]:
print("Best result: {}; Accuracy: {:.3f}.".format(optimizer.max["params"], optimizer.max["target"]))

plt.figure(figsize = (15, 5))
plt.plot(range(1, 1 + len(optimizer.space.target)), optimizer.space.target, "-o")
plt.grid(True)
plt.xlabel("Iteration", fontsize = 14)
plt.ylabel("Black box function f(x)", fontsize = 14)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 14)
plt.show()