In [3]:
Link = 'https://github.com/natsunoyuki/blog_posts/blob/main/data_science/Bayesian%20Optimization%20of%20Model%20Hyperparameters.ipynb'

In [4]:
SEED = 1412
Test_Ratio = 0.2

In [5]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X,y = make_classification(n_samples=10000,n_features=5,n_informative=2,n_classes=2,n_clusters_per_class=1,flip_y=0.2,shuffle=False,random_state=SEED)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = Test_Ratio, random_state = SEED)



In [9]:
from sklearn.ensemble import RandomForestClassifier

def mdl_para_lst_maker():
    lst = []
    for n_estimator in range(50,501,50):
        for criterion in ['gini', 'entropy', 'log_loss']:
            for max_feature in ['sqrt', 'log2', None]:
                MyDict = {'n_estimators':[n_estimator], 'criterion':[criterion], 'max_features':[max_feature], 'random_state':[SEED]}
                lst.append(MyDict)
    return lst 
    
mdl_para_lst = mdl_para_lst_maker()

def get_mdl_para(x):
    x = int(x)
    try:
        return mdl_para_lst[x]
    except:
        x = x%len(mdl_para_lst)
        print(f'Yo! X ({x}) for mdl is out of bounds!')
        return mdl_para_lst[x]

In [10]:
from niapy.algorithms.basic import FireflyAlgorithm
import numpy as np 

def NIA_FireFly():
    lst = []
    for population_size in range(10,101,10):#10
        for alpha in np.arange(0.10,0.21,0.01):#10
            for beta0 in range(10,101,10):#10
                for gamma in np.arange(0.05,1.01,0.05):#19
                    for theta in np.arange(0.05,1.01,0.05):#19
                        Algo = FireflyAlgorithm()
                        Algo.set_parameters(population_size=population_size, 
                        alpha=alpha, 
                        beta0=beta0, 
                        gamma=gamma, 
                        theta=theta,
                        seed=SEED
                        )
                        lst.append(Algo)
    return lst 

FA_lst = NIA_FireFly()

def get_algo(x):
    x = int(x)
    try:
        return FA_lst[x]
    except:
        print(f'Yo! X ({x}) for algo is out of bounds!')
        x = x%(len(FA_lst))
        return FA_lst[x]

In [20]:
from sklearn_nature_inspired_algorithms.model_selection import NatureInspiredSearchCV

Dict_combo = dict()

def Get_Score(mdl_val,algo_val):
    mdl_val = int(mdl_val)
    algo_val = int(algo_val)
    
    combo_str = f'{mdl_val}_{algo_val}'
    if combo_str in Dict_combo:
        return Dict_combo[combo_str]

    param_grid = get_mdl_para(mdl_val)

    algorithm = get_algo(algo_val)

    nia_search_mdl = NatureInspiredSearchCV(
        estimator=RandomForestClassifier(),
        param_grid=param_grid,
        algorithm=algorithm,
        runs=1
    )
    nia_search_mdl.fit(X_train, y_train)
    val = nia_search_mdl.score(X_test,y_test)
    Dict_combo[combo_str] = val 
    return val 


In [21]:
19*19*1000 * 100*3*3

324900000

In [22]:
324900000*6 ,32490000, (32490000/60), (32490000/60)/24, ((32490000/60)/24)/365 

(1949400000, 32490000, 541500.0, 22562.5, 61.81506849315068)

In [23]:
from bayes_opt import BayesianOptimization, UtilityFunction

# Define the metric to optimize over.
'''
def black_box_function(algo,mdl):
    return Get_Score(mdl,algo)
''' 

# Set range of C to optimize over. bayes_opt requires this to be a dictionary.
pbounds = {"algo_val": (0, len(FA_lst)-1), "mdl_val" : (0,len(mdl_para_lst)-1)}
#ptypes = {"algo_val": int, "mdl_val": int}

# Create BayesianOptimization object, and optimize (maximize) black_box_function.
optimizer = BayesianOptimization(f = Get_Score, pbounds = pbounds, verbose = 2, random_state = SEED)

optimizer.maximize(init_points = 20, n_iter = 5)

print("Best result: {}; f(x) = {}.".format(optimizer.max["params"], optimizer.max["target"]))

|   iter    |  target   | algo_val  |  mdl_val  |
-------------------------------------------------
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
| [0m1        [0m | [0m0.8635   [0m | [0m3.871e+05[0m | [0m66.94    [0m |
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
| [0m2        [0m | [0m0.8625   [0m | [0m2.807e+04[0m | [0m39.09    [0m |
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
| [0m3        [0m | [0m0.8635   [0m | [0m1.408e+05[0m | [0m67.72    [0m |
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
| [0m4        [0m | [0m0.863    [0m | [0m4.054e+05[0m | [0m78.24    [0m |
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
| [0m5        [0m | [0m0.863    [0m | [0m1.719e+05[0m | [0m49.47    [0m |
Fitting at most 1 candidates
Optimization finished, 1 candidates were fitted
| [0m6        [0m | [0m0.

UNTESTED AFTER THIS

In [None]:
# Create optimizer and utility function objects.
optimizer = BayesianOptimization(f = None, pbounds = {"algo_val": (0, len(FA_lst)-1), "mdl_val" : (0,len(mdl_lst)-1)}, verbose = 2, random_state = SEED)
utility = UtilityFunction(kind = "ucb", kappa = 1.96, xi = 0.01)

# Optimization for loop.
for i in range(15):
    # Get optimizer to suggest a new parameter value to try.
    next_point = optimizer.suggest(utility)
    # Evaluate the output of the black_box_function using the new parameter value.
    target = Get_Score(**next_point)
    try:
        # Update the optimizer with the evaluation results. This needs to be in try-except
        # to prevent repeat errors from occuring.
        optimizer.register(params = next_point, target = target)
    except:
        pass
    
print("Best result: {}; f(x) = {:.3f}.".format(optimizer.max["params"], optimizer.max["target"]))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize = (15, 5))
plt.plot(range(1, 1+len(optimizer.space.target)), optimizer.space.target, "-o")
plt.grid(True)
plt.xlabel("Iteration")
plt.ylabel("Black box function")
plt.show()

In [None]:
break


NEED TO BE CHECKED BEFORE RUNNING

In [None]:
import skopt

# Unlike bayes_opt, skopt requires that the search boundary is given as an array of tuples.
p_bounds = [(0.1, 10.0)]

opt = skopt.Optimizer(dimensions = p_bounds,
                base_estimator = "GP", # Gaussian Process regressor.
                n_initial_points = 5, 
                initial_point_generator = 'random', 
                acq_func = "LCB", # Lower Confidence Bounds.
                random_state = 0)

for i in range(15):
    next_point_to_try = opt.ask() # next_point is returned as a list.
    # Unlike bayes_opt, skopt performs minimization. Therefore we use -black_box_function(*next_point).
    loss_to_minimize = -Get_Score(*next_point_to_try)
    result = opt.tell(next_point_to_try, loss_to_minimize)
    
# Again, don't forget to use the negative of result as we are performing minimization here.
print("Best result: {}, f(x) = {:.3f}.".format(result["x"], -result["fun"]))

TypeError: Get_Score() missing 1 required positional argument: 'algo_val'

In [None]:
plt.figure(figsize = (15, 5))
plt.plot(range(1, len(result["func_vals"])+1), result["func_vals"], "-o")
plt.grid(True)
plt.xlabel("iteration")
plt.ylabel("loss_function")
plt.show()