# 1 - Install

In [28]:
!pip uninstall -y GPopt

Found existing installation: GPopt 0.5.0
Uninstalling GPopt-0.5.0:
  Successfully uninstalled GPopt-0.5.0


In [29]:
!pip uninstall -y BCN

Found existing installation: BCN 0.7.1
Uninstalling BCN-0.7.1:
  Successfully uninstalled BCN-0.7.1


In [30]:
!pip install BCN nnetsauce --upgrade --no-cache-dir

Collecting BCN
  Downloading BCN-0.7.1-py2.py3-none-any.whl.metadata (873 bytes)
Downloading BCN-0.7.1-py2.py3-none-any.whl (7.6 kB)
Installing collected packages: BCN
Successfully installed BCN-0.7.1


In [31]:
!pip install ../.. --upgrade --no-cache-dir

Processing /Users/t/Documents/Python_Packages/GPopt
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: GPopt
  Building wheel for GPopt (setup.py) ... [?25ldone
[?25h  Created wheel for GPopt: filename=GPopt-0.5.0-py2.py3-none-any.whl size=69337 sha256=ca5576bb1d575beea898f75a8d88c3cabd165bb619cc7cb07cbe44db2487b4ac
  Stored in directory: /private/var/folders/cp/q8d6040n3m38d22z3hkk1zc40000gn/T/pip-ephem-wheel-cache-g_ne7w4o/wheels/18/c5/f2/2bcb5749155d04d8e285ee88d9c1f7d49467719147ee803dc9
Successfully built GPopt
Installing collected packages: GPopt
Successfully installed GPopt-0.5.0


In [32]:
import BCN as bcn # takes a long time to run, ONLY the first time it's run
import nnetsauce as ns 
import numpy as np

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.linear_model import BayesianRidge, ARDRegression
from sklearn.gaussian_process.kernels import Matern
from sklearn.datasets import load_iris, load_wine, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

In [33]:
import GPopt as gp

# 2 - cv

In [34]:
from sklearn.model_selection import cross_val_score

In [35]:
surrogates = [ns.CustomRegressor(BayesianRidge()),
              ns.CustomRegressor(ARDRegression()),
              ns.CustomRegressor(GaussianProcessRegressor( # this is where the Gaussian Process can be chosen
                          kernel=Matern(nu=1.5),
                          alpha=1e-6,
                          normalize_y=True,
                          n_restarts_optimizer=25,
                          random_state=42,
                      ))]

In [36]:
def bcn_cv(X_train, y_train,
               B = 10, nu = 0.335855,
               lam = 10**0.7837525,
               r = 1 - 10**(-5.470031),
               tol = 10**-7,
               col_sample=1,
               n_clusters = 3):

  estimator  = bcn.BCNClassifier(B = int(B),
                                 nu = nu,
                                 lam = lam,
                                 r = r,
                                 tol = tol,
                                 col_sample = col_sample,
                                 n_clusters = n_clusters,
                                 activation="tanh",
                                 type_optim="nlminb",
                                 show_progress = False)

  return -cross_val_score(estimator, X_train, y_train,
                          scoring='accuracy',
                          cv=5, n_jobs=None,
                          verbose=0).mean()

def optimize_bcn(X_train, y_train, surr_idx):
  # objective function for hyperparams tuning
  def crossval_objective(x):
    return bcn_cv(X_train=X_train,
                  y_train=y_train,
                  B = int(x[0]),
                  nu = 10**x[1],
                  lam = 10**x[2],
                  r = 1 - 10**x[3],
                  tol = 10**x[4],
                  col_sample = np.ceil(x[5]),
                  n_clusters = np.ceil(x[6]))
  gp_opt = gp.GPOpt(objective_func=crossval_objective,
                    lower_bound = np.array([   3,    -6, -10, -10,   -6, 0.8, 1]),
                    upper_bound = np.array([ 100,  -0.1,  10,  -1, -0.1,   1, 4]),
                    params_names=["B", "nu", "lam", "r", "tol", "col_sample", "n_clusters"],
                    surrogate_obj = surrogates[surr_idx],
                      n_init=10, n_iter=190, seed=3137)
  return gp_opt.optimize(verbose=2, abs_tol=1e-3)

In [37]:
dataset = load_wine()
X = dataset.data
y = dataset.target

# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2, random_state=3137)

# hyperparams tuning
res_opt = optimize_bcn(X_train, y_train, 0)
print(res_opt)


 Creating initial design... 

point: [51.5  -3.05  0.   -5.5  -3.05  0.9   2.5 ]; score: -0.4083743842364532
point: [75.75  -4.525  5.    -7.75  -1.575  0.85   3.25 ]; score: -0.4083743842364532
point: [27.25  -1.575 -5.    -3.25  -4.525  0.95   1.75 ]; score: -0.8953201970443351
point: [39.375  -3.7875  2.5    -8.875  -0.8375  0.975   1.375 ]; score: -0.4083743842364532
point: [87.875  -0.8375 -7.5    -4.375  -3.7875  0.875   2.875 ]; score: -0.9438423645320198
point: [63.625  -5.2625 -2.5    -6.625  -5.2625  0.925   3.625 ]; score: -0.4083743842364532
point: [15.125  -2.3125  7.5    -2.125  -2.3125  0.825   2.125 ]; score: -0.4083743842364532
point: [21.1875  -4.15625 -3.75    -3.8125  -2.68125  0.8375   1.1875 ]; score: -0.4083743842364532
point: [69.6875  -1.20625  6.25    -8.3125  -5.63125  0.9375   2.6875 ]; score: -0.9438423645320198
point: [93.9375  -5.63125  1.25    -1.5625  -4.15625  0.8875   3.4375 ]; score: -0.4083743842364532

 ...Done. 


 Optimization loop... 

iteratio

ValueError: too many values to unpack (expected 2)

In [None]:
print(res_opt.best_score)

-0.9857142857142858


In [None]:
res_opt.best_params["B"] = int(res_opt.best_params["B"])
res_opt.best_params["nu"] = 10**res_opt.best_params["nu"]
res_opt.best_params["lam"] = 10**res_opt.best_params["lam"]
res_opt.best_params["r"] = 1 - 10**res_opt.best_params["r"]
res_opt.best_params["tol"] = 10**res_opt.best_params["tol"]
res_opt.best_params["col_sample"] = np.ceil(res_opt.best_params["col_sample"])
res_opt.best_params["n_clusters"] = np.ceil(res_opt.best_params["n_clusters"])

In [None]:
start = time()
estimator = bcn.BCNClassifier(**res_opt.best_params,
                              activation="tanh",
                              type_optim="nlminb").fit(X_train, y_train)
print(f"\n Elapsed: {time() - start}")
start = time()
print(f"\n\n Test set accuracy: {estimator.score(X_test, y_test)}")
print(f"\n Elapsed: {time() - start}")


 Elapsed: 0.3253192901611328


 Test set accuracy: 1.0

 Elapsed: 0.0092620849609375
