In [None]:
#default_exp tab_ae

In [None]:
This is a decorator that automatically deals with running the bayes-opt package and logging and reloading the progress related to an estimator/model. 
This is a function I adopted from https://gist.github.com/VincentGatien/882b3bbd81ff98b426ac418c45cfc1bd  Thanks to the original author! 
I made minimal changes to tailor to my use cases. 

In [None]:
# export
from sklearn.model_selection import cross_val_score
from bayes_opt import BayesianOptimization
from bayes_opt import JSONLogger
from bayes_opt.event import Events
from bayes_opt.util import load_logs
from pathlib import Path
from functools import wraps
from typing import Dict


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.datasets import make_classification

In [None]:

def get_data():
    """Synthetic binary classification dataset."""
    data, targets = make_classification(
        n_samples=1000,
        n_features=45,
        n_informative=12,
        n_redundant=7,
        random_state=134985745,
    )
    return data, targets


x, y = get_data()


In [None]:
# export
def optimize_bayes_param(X, y, objective_fn=cross_val_score, *args_objective, **kwargs_objective):
    """
    The first closure passes in X, y and the objective funtion, which defaults to the cross_val_score function from sklearn. 


    :param objective_fn:
        objective function to optimize
    :param X: np.array
        Matrix of features
    :param y: np.array
        Vectors of labels
    :param args_eval:
        *args passed to objective_fn
    :param kwargs_eval:
        **kwargs passed to objective_fn
    """

    def optimize_bayes_wo_param(parse_model_params):

        def _opt_engine(*args_model, **kwargs_model):
            """
            This is the running engine function that takes an estimator/model object and a loss function 

            """

            estimator = parse_model_params(*args_model, **kwargs_model)
            return objective_fn(estimator, X=X, y=y, *args_objective, **kwargs_objective).mean()

        @wraps(parse_model_params)
        def run_trials(pbounds: Dict,
                       init_points: int = 10,
                       n_iter: int = 10,
                       log_dir: Path = Path("./bayes_opt_logs"),
                       acq: str = 'ucb',
                       kappa: str = 2.576,
                       fit: bool = True):
            """
            :param pbounds: dict
                Dictionary with parameters names as keys and a tuple with minimum
                and maximum values.
            :param init_points : int
                Number of iterations before the explorations starts the exploration
                for the maximum.
            :param n_iter: int
                Number of iterations where the method attempts to find the maximum
                value.
            :param log_dir: Path
                Directory to log json results
            :param acq: str
                The acquisition method used.
            :param kappa: float
                Parameter to indicate how closed are the next parameters sampled.
                Higher value = favors spaces that are least explored.
                Lower value = favors spaces where the regression function is the
                highest.
            :param fit: bool
                if True the best model is fitted on de data
                if False the best model is returned unfitted
            :return: A Sklearn model with the pbounds hyperparameters optimized
             by Bayesian Optimisation in cross-validation
            """
            optimizer = BayesianOptimization(_opt_engine, pbounds=pbounds)
            log_dir = Path(log_dir)
            if log_dir.exists():
                all_log = [str(path) for path in log_dir.iterdir()]
                load_logs(optimizer, logs=all_log)
                filename = 'log_{}.json'.format(len(all_log))
            else:
                log_dir.mkdir(parents=True)
                filename = 'log_0.json'
            logger = JSONLogger(path=str(log_dir / filename))
            optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

            optimizer.maximize(init_points, n_iter, kappa=kappa, acq=acq)
            print(f"The best combination of hyperparameters are { optimizer.max['params'] }")
            best_model = parse_model_params(**optimizer.max['params'])
            if fit:
                best_model.fit(X=X, y=y)
            return best_model

        return run_trials

    return optimize_bayes_wo_param

In [None]:
pbounds_forest = {
    'n_estimators': (10, 1000),
    "min_samples_split": (2, 50),
}
# Notice that the parameters in pbounds should match with those in the optimiz_func. 

@optimize_bayes_param(X=x, y=y)
def optimize_forest(n_estimators: float, min_samples_split: float) -> RandomForestClassifier:
    return RandomForestClassifier(n_estimators=int(n_estimators), min_samples_split=int(min_samples_split),
                                      n_jobs=-1)

Notice that once we do not change the default argument `fit = True`, the returned object is the best fitted model itself; it `fit = False`, then nothing is returned and the logger tracked the loss from the corresponding hyperparameters

In [None]:
best_rf = optimize_forest(init_points=5, n_iter=10, pbounds=pbounds_forest, log_dir=Path("./bayes_opt_logs/forest"))


The best combination of hyperparameters are {'min_samples_split': 9.286943598933544, 'n_estimators': 338.51384056305545}


Now we can see that the best model returned indeed carried over the best hyperparameters. 

In [None]:
best_rf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 9,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 338,
 'n_jobs': -1,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}