In [1]:
import numpy as np

import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import roc_auc_score
from sklearn.datasets import make_classification

from bayes_opt import BayesianOptimization, UtilityFunction

import warnings
warnings.filterwarnings("ignore")


In [2]:
Link = 'https://github.com/natsunoyuki/blog_posts/blob/main/data_science/Bayesian%20Optimization%20of%20Model%20Hyperparameters.ipynb'

In [3]:
SEED = 1412
Test_Ratio = 0.2

In [4]:
X,y = make_classification(n_samples=10000,n_features=5,n_informative=2,n_classes=2,n_clusters_per_class=1,flip_y=0.2,shuffle=False,random_state=SEED)

In [5]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = Test_Ratio, random_state = SEED)



In [6]:
from sklearn.ensemble import RandomForestClassifier

from niapy.problems import Problem
from niapy.task import OptimizationType, Task
from niapy.runner import Runner


In [7]:
def mdl_lst_maker():
    lst = []
    for n_estimator in range(50,501,50):
        for criterion in ['gini', 'entropy', 'log_loss']:
            for max_feature in ['sqrt', 'log2', None]:
                lst.append(RandomForestClassifier(n_estimators=n_estimator, criterion=criterion, max_features=max_feature, random_state=SEED ))
    return lst 

def mdl_para_lst_maker():
    lst = []
    for n_estimator in range(50,501,50):
        for criterion in ['gini', 'entropy', 'log_loss']:
            for max_feature in ['sqrt', 'log2', None]:
                MyDict = {'n_estimators':[n_estimator], 'criterion':[criterion], 'max_features':[max_feature], 'random_state':[SEED]}
                lst.append(MyDict)
    return lst 

In [8]:
mdl_lst = mdl_lst_maker()

def get_mdl(x):
    x = int(x)
    try:
        return mdl_lst[x]
    except:
        x = x%len(mdl_lst)
        print(f'Yo! X ({x}) for mdl is out of bounds!')
        return mdl_lst[x]

mdl_para_lst = mdl_para_lst_maker()

def get_mdl_para(x):
    x = int(x)
    try:
        return mdl_para_lst[x]
    except:
        x = x%len(mdl_para_lst)
        print(f'Yo! X ({x}) for mdl is out of bounds!')
        return mdl_para_lst[x]

In [9]:
from niapy.algorithms.basic import FireflyAlgorithm, BatAlgorithm

def NIA_FireFly():
    lst = []
    for population_size in range(10,101,10):#10
        for alpha in np.arange(0.10,0.21,0.01):#10
            for beta0 in range(10,101,10):#10
                for gamma in np.arange(0.05,1.01,0.05):#19
                    for theta in np.arange(0.05,1.01,0.05):#19
                        Algo = FireflyAlgorithm()
                        Algo.set_parameters(population_size=population_size, 
                        alpha=alpha, 
                        beta0=beta0, 
                        gamma=gamma, 
                        theta=theta,
                        seed=SEED
                        )
                        lst.append(Algo)
    return lst 


In [10]:
FA_lst = NIA_FireFly()
def get_algo(x):
    x = int(x)
    try:
        return FA_lst[x]
    except:
        print(f'Yo! X ({x}) for algo is out of bounds!')
        x = x%(len(FA_lst))
        return FA_lst[x]
    

In [14]:
import sys, os

# Disable
def blockPrint():
    sys.stdout = open(os.devnull, 'w')

# Restore
def enablePrint():
    sys.stdout = sys.__stdout__


print("This will print")

blockPrint()
print("This won't")

enablePrint()
print("This will too")

In [12]:
from sklearn_nature_inspired_algorithms.model_selection import NatureInspiredSearchCV

param_grid = get_mdl_para(1)
clf = RandomForestClassifier()

algorithm = get_algo(0) # when custom algorithm is provided random_state is ignored
#algorithm.set_parameters(NP=50, Ts=5, Mr=0.25)

nia_search_mdl = NatureInspiredSearchCV(
    estimator=RandomForestClassifier(),
    param_grid=param_grid,
    algorithm=algorithm,
    runs=1
)
blockPrint()
nia_search_mdl.fit(X_train, y_train)
val = nia_search_mdl.score(X_test,y_test)
sys.stdout = sys.__stdout__
enablePrint()

In [15]:
sys.stdout = sys.__stdout__
print(val)

In [None]:
19*19*1000 * 100*3*3

In [None]:
324900000*6 ,32490000, (32490000/60), (32490000/60)/24, ((32490000/60)/24)/365 

In [None]:
class MyProblem(Problem):
    def __init__(self, X_train, X_test, y_train, y_test, mdl_no):
        super().__init__(dimension=4, lower=0, upper=1)
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.mdl_no = mdl_no

    def _evaluate(self,x):
        print(f'this is x -> {x}')
        mdl = get_mdl(self.mdl_no)
        mdl.fit(self.X_train, self.y_train)
        mdl_score = mdl.score(self.X_test, self.y_test) 
        return mdl_score

In [None]:
Curr_Algo = get_algo(0)
problem = MyProblem(X_train, X_test, y_train, y_test, 0)


In [None]:
#dimension=10, max_evals=1000000, runs=1, algorithms='ArtificialBeeColonyAlgorithm', problems='Ackley'
My_Runner = Runner(max_evals=10000, algorithms=[Curr_Algo], problems=[problem])
My_Runner.run()

In [None]:
Curr_Algo = get_algo(0)
problem = MyProblem(X_train, X_test, y_train, y_test, 0)
task = Task(problem, max_iters=100, optimization_type=OptimizationType.MAXIMIZATION)
print(task)
best_params, best_accuracy = Curr_Algo.run(task)

best_params_HBA, best_accuracy = algorithm.run(task)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = Test_Ratio, random_state = SEED)

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:

# Define the metric to optimize over.
def black_box_function(algo,mdl):
    # Function to maximize using Bayesian optimization.
    Curr_Algo = get_algo(algo)
    problem = MyProblem(X_train, X_test, y_train, y_test, mdl)
    task = Task(problem, max_iters=100, optimization_type=OptimizationType.MAXIMIZATION)
    best_params, best_accuracy = Curr_Algo.run(task)
    
    best_mdl = get_classifier(best_params)
    best_mdl.fit(X_train_scaled,y_train)
    y_score = best_mdl.score(X_test_scaled,y_test)

    return y_score

# Set range of C to optimize over. bayes_opt requires this to be a dictionary.
pbounds = {"algo": (0, len(FA_lst)-1), "mdl" : (0,len(mdl_lst)-1)}
ptypes = {'algo': int, 'mdl': int}

# Create BayesianOptimization object, and optimize (maximize) black_box_function.
optimizer = BayesianOptimization(f = black_box_function, pbounds = pbounds, ptypes = ptypes, verbose = 2, random_state = SEED)

optimizer.maximize(init_points = 10, n_iter = 10)

print("Best result: {}; f(x) = {}.".format(optimizer.max["params"], optimizer.max["target"]))

In [None]:
# Create optimizer and utility function objects.
optimizer = BayesianOptimization(f = None, pbounds = {"C": (0.1, 10)}, verbose = 2, random_state = 0)
utility = UtilityFunction(kind = "ucb", kappa = 1.96, xi = 0.01)

# Optimization for loop.
for i in range(15):
    # Get optimizer to suggest a new parameter value to try.
    next_point = optimizer.suggest(utility)
    # Evaluate the output of the black_box_function using the new parameter value.
    target = black_box_function(**next_point)
    try:
        # Update the optimizer with the evaluation results. This needs to be in try-except
        # to prevent repeat errors from occuring.
        optimizer.register(params = next_point, target = target)
    except:
        pass
    
print("Best result: {}; f(x) = {:.3f}.".format(optimizer.max["params"], optimizer.max["target"]))

In [None]:
plt.figure(figsize = (15, 5))
plt.plot(range(1, 1+len(optimizer.space.target)), optimizer.space.target, "-o")
plt.grid(True)
plt.xlabel("Iteration")
plt.ylabel("Black box function")
plt.show()

In [None]:
import skopt

# Unlike bayes_opt, skopt requires that the search boundary is given as an array of tuples.
p_bounds = [(0.1, 10.0)]

opt = skopt.Optimizer(dimensions = p_bounds,
                base_estimator = "GP", # Gaussian Process regressor.
                n_initial_points = 5, 
                initial_point_generator = 'random', 
                acq_func = "LCB", # Lower Confidence Bounds.
                random_state = 0)

for i in range(15):
    next_point_to_try = opt.ask() # next_point is returned as a list.
    # Unlike bayes_opt, skopt performs minimization. Therefore we use -black_box_function(*next_point).
    loss_to_minimize = -black_box_function(*next_point_to_try)
    result = opt.tell(next_point_to_try, loss_to_minimize)
    
# Again, don't forget to use the negative of result as we are performing minimization here.
print("Best result: {}, f(x) = {:.3f}.".format(result["x"], -result["fun"]))

In [None]:
plt.figure(figsize = (15, 5))
plt.plot(range(1, len(result["func_vals"])+1), result["func_vals"], "-o")
plt.grid(True)
plt.xlabel("iteration")
plt.ylabel("loss_function")
plt.show()