# 1 - Install

<a target="_blank" href="https://colab.research.google.com/github/Techtonique/GPopt/blob/ucb/GPopt/demo/thierrymoudiki_20240206_tuning_BCN_classifier_lazy.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [1]:
!pip uninstall -y BCN GPopt

[0m

In [2]:
!pip install BCN --upgrade --no-cache-dir

Collecting BCN
  Downloading BCN-0.7.1-py2.py3-none-any.whl.metadata (873 bytes)
Collecting joblib (from BCN)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting numpy (from BCN)
  Downloading numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hCollecting rpy2 (from BCN)
  Downloading rpy2-3.5.16-cp311-cp311-macosx_10_9_universal2.whl.metadata (4.5 kB)
Collecting scikit-learn (from BCN)
  Downloading scikit_learn-1.4.2-cp311-cp311-macosx_10_9_x86_64.whl.metadata (11 kB)
Collecting cffi>=1.15.1 (from rpy2->BCN)
  Downloading cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl.metadata (1.5 kB)
Collecting jinja2 (from rpy2->BCN)
  Downloading Jinja2-3.1.3-py3-none-any.whl.metadata (3.3 kB)
Collecting tzlocal (from rpy2->BCN)
  Downloading tzlocal-5.2-py3-none-any.whl.metadata (7.8 kB)
Collecting scipy>=1.6.0 (from sc

In [3]:
!pip install ../.. --upgrade --no-cache-dir

Processing /Users/t/Documents/Python_Packages/GPopt
  Preparing metadata (setup.py) ... [?25ldone
Collecting matplotlib (from GPopt==0.6.0)
  Downloading matplotlib-3.8.4-cp311-cp311-macosx_10_12_x86_64.whl.metadata (5.8 kB)
Collecting nnetsauce (from GPopt==0.6.0)
  Downloading nnetsauce-0.20.1-py2.py3-none-any.whl.metadata (822 bytes)
Collecting pandas (from GPopt==0.6.0)
  Downloading pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl.metadata (19 kB)
Collecting contourpy>=1.0.1 (from matplotlib->GPopt==0.6.0)
  Downloading contourpy-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib->GPopt==0.6.0)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib->GPopt==0.6.0)
  Downloading fonttools-4.51.0-cp311-cp311-macosx_10_9_x86_64.whl.metadata (159 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.5/159.5 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[

In [4]:
import BCN as bcn # takes a long time to run, ONLY the first time it's run
import GPopt as gp
import numpy as np

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.datasets import load_iris, load_wine, load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time

# 2 - cv

In [5]:
from sklearn.model_selection import cross_val_score

In [6]:
def bcn_cv(X_train, y_train,
               B = 10, nu = 0.335855,
               lam = 10**0.7837525,
               r = 1 - 10**(-5.470031),
               tol = 10**-7,
               col_sample=1,
               n_clusters = 3):

  estimator  = bcn.BCNClassifier(B = int(B),
                                 nu = nu,
                                 lam = lam,
                                 r = r,
                                 tol = tol,
                                 col_sample = col_sample,
                                 n_clusters = n_clusters,
                                 activation="tanh",
                                 type_optim="nlminb",
                                 show_progress = False)

  return -cross_val_score(estimator, X_train, y_train,
                          scoring='accuracy',
                          cv=5, n_jobs=None,
                          verbose=0).mean()

def optimize_bcn(X_train, y_train, method="queue"):
  # objective function for hyperparams tuning
  def crossval_objective(x):
    return bcn_cv(X_train=X_train,
                  y_train=y_train,
                  B = int(x[0]),
                  nu = 10**x[1],
                  lam = 10**x[2],
                  r = 1 - 10**x[3],
                  tol = 10**x[4],
                  col_sample = np.ceil(x[5]),
                  n_clusters = np.ceil(x[6]))
  gp_opt = gp.GPOpt(objective_func=crossval_objective,
                    lower_bound = np.array([   3,    -6, -10, -10,   -6, 0.8, 1]),
                    upper_bound = np.array([ 100,  -0.1,  10,  -1, -0.1,   1, 4]),
                    params_names=["B", "nu", "lam", "r", "tol", "col_sample", "n_clusters"],                    
                    n_init=10, n_iter=90, seed=3137)
  return gp_opt.lazyoptimize(method = method, verbose=2, abs_tol=1e-3, 
                     type_exec = method,
                     estimators = ["LinearRegression",
                                    "RidgeCV",
                                    "LassoCV",
                                    "ElasticNetCV", 
                                    "KNeighborsRegressor",
                                    "BaggingRegressor",
                                    "ExtraTreesRegressor", 
                                    "RandomForestRegressor", 
                                    ]
                                    )

In [7]:
dataset = load_wine()
X = dataset.data
y = dataset.target

In [8]:
# hyperparams tuning
res_opt = optimize_bcn(X, y, method="queue")
print(res_opt)
print(res_opt.best_score)


 adjusting surrogate model # 1 (CustomRegressor(BaggingRegressor))... 



AssertionError: method must be in ('bayesian', 'mc')

In [None]:
# hyperparams tuning
res_opt = optimize_bcn(X, y, method="independent")
print(res_opt)
print(res_opt.best_score)

-0.9857142857142858
