# 1 - Install

In [1]:
!pip install BCN --upgrade --no-cache-dir



In [2]:
!pip install GPopt



In [3]:
import BCN as bcn # takes a long time to run, ONLY the first time it's run
import GPopt as gp
import numpy as np

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.datasets import load_iris, load_wine, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

# 2 - cv

In [4]:
from sklearn.model_selection import cross_val_score

In [5]:
def bcn_cv(X_train, y_train,
               B = 10, nu = 0.335855,
               lam = 10**0.7837525,
               r = 1 - 10**(-5.470031),
               tol = 10**-7,
               col_sample=1,
               n_clusters = 3):

  estimator  = bcn.BCNClassifier(B = int(B),
                                 nu = nu,
                                 lam = lam,
                                 r = r,
                                 tol = tol,
                                 col_sample = col_sample,
                                 n_clusters = n_clusters,
                                 activation="tanh",
                                 type_optim="nlminb",
                                 show_progress = False)

  return -cross_val_score(estimator, X_train, y_train,
                          scoring='accuracy',
                          cv=5, n_jobs=None,
                          verbose=0).mean()

def optimize_bcn(X_train, y_train):
  # objective function for hyperparams tuning
  def crossval_objective(x):
    return bcn_cv(X_train=X_train,
                  y_train=y_train,
                  B = int(x[0]),
                  nu = 10**x[1],
                  lam = 10**x[2],
                  r = 1 - 10**x[3],
                  tol = 10**x[4],
                  col_sample = np.ceil(x[5]),
                  n_clusters = np.ceil(x[6]))
  gp_opt = gp.GPOpt(objective_func=crossval_objective,
                    lower_bound = np.array([   3,    -6, -10, -10,   -6, 0.8, 1]),
                    upper_bound = np.array([ 100,  -0.1,  10,  -1, -0.1,   1, 4]),
                    gp_obj = GaussianProcessRegressor( # this is where the Gaussian Process can be chosen
                          kernel=Matern(nu=1.5),
                          alpha=1e-6,
                          normalize_y=True,
                          n_restarts_optimizer=25,
                          random_state=42,
                      ),
                      n_init=10, n_iter=190, seed=3137)
  return {'parameters': gp_opt.optimize(verbose=2, abs_tol=1e-3), 'opt_object':  gp_opt}

In [6]:
datasets = [load_wine, load_iris, load_breast_cancer]

In [None]:
for elt in datasets:

  dataset = elt()
  X = dataset.data
  y = dataset.target

  # split data into training test and test set
  X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                      test_size=0.2, random_state=3137)

  # hyperparams tuning
  res1 = optimize_bcn(X_train, y_train)
  print(res1)
  parameters = res1["parameters"]
  start = time()

  start = time()
  estimator = bcn.BCNClassifier(B=int(parameters[0][0]),
                                nu=10**parameters[0][1],
                                lam=10**parameters[0][2],
                                r=1-10**parameters[0][3],
                                tol=10**parameters[0][4],
                                col_sample=np.ceil(parameters[0][5]),
                                n_clusters=np.ceil(parameters[0][6]),
                                activation="tanh",
                                type_optim="nlminb").fit(X_train, y_train)
  print(f"\n Elapsed: {time() - start}")
  start = time()
  print(f"\n\n Test set accuracy: {estimator.score(X_test, y_test)}")
  print(f"\n Elapsed: {time() - start}")


 Creating initial design... 

point: [51.5  -3.05  0.   -5.5  -3.05  0.9   2.5 ]; score: -0.4083743842364532
point: [75.75  -4.525  5.    -7.75  -1.575  0.85   3.25 ]; score: -0.4083743842364532
point: [27.25  -1.575 -5.    -3.25  -4.525  0.95   1.75 ]; score: -0.888177339901478
point: [39.375  -3.7875  2.5    -8.875  -0.8375  0.975   1.375 ]; score: -0.4083743842364532
point: [87.875  -0.8375 -7.5    -4.375  -3.7875  0.875   2.875 ]; score: -0.9438423645320198
point: [63.625  -5.2625 -2.5    -6.625  -5.2625  0.925   3.625 ]; score: -0.4083743842364532
point: [15.125  -2.3125  7.5    -2.125  -2.3125  0.825   2.125 ]; score: -0.4083743842364532
point: [21.1875  -4.15625 -3.75    -3.8125  -2.68125  0.8375   1.1875 ]; score: -0.4083743842364532
point: [69.6875  -1.20625  6.25    -8.3125  -5.63125  0.9375   2.6875 ]; score: -0.9438423645320198
point: [93.9375  -5.63125  1.25    -1.5625  -4.15625  0.8875   3.4375 ]; score: -0.4083743842364532

 ...Done. 


 Optimization loop... 

iteration