# Bayesian optimization using Skopt API (GridSearchCV replacement)

https://721-54340642-gh.circle-artifacts.com/0/tmp/circle-artifacts.kdXLWjd/api/bayessearchcv.html

## Before you continue, you install skopt using following command 

pip install scikit-optimize


In [None]:
from skopt import BayesSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
import lightgbm as lgb

In [None]:
#this part is generic but essentially only your training set is used here (not the test set)

# Extract the labels for training
y = sub_train['TARGET']
    
# Remove the ids and target
X = sub_train.drop(columns = ['TARGET'])

In [None]:
# parameters for BayOpt
n_iterations = 15    # number if iterations = # of different models to try (try more, the default is 250ish)

In [None]:
bayes_opt = BayesSearchCV(
    estimator = lgb.LGBMClassifier(
        objective='binary',
        metric='auc',
        n_estimators=100,  # hardcoded to save cpu time and we know that it gives good results (try with 100 first then 10000) 
        n_jobs=-1,           # specify number of cores to use for parallelization
        verbose=0,
        
    ),  
    
    n_jobs = -1,   
    n_iter = n_iterations,   
    verbose = 0,
    refit = True,      #Refit the best estimator with the entire dataset
    random_state = 27,
   
    search_spaces = {
        'learning_rate': (0.01, 0.1, 'log-uniform'),
        'num_leaves': (1, 50),      
        'max_depth': (0, 10),
        'min_child_samples': (0, 50),
        'max_bin': (100, 1000),
        'subsample': (0.01, 1.0, 'uniform'),
        'subsample_freq': (0, 10),
        'colsample_bytree': (0.01, 1.0, 'uniform'),
        'min_child_weight': (30, 50),
        'reg_lambda': (1e-9, 1000, 'log-uniform'),
        'reg_alpha': (1e-9, 1.0, 'log-uniform'),
        'scale_pos_weight': (1e-6, 100, 'log-uniform'),
    },    
    scoring = 'roc_auc',
    cv = StratifiedKFold(
        n_splits=5,
        shuffle=True,
        random_state=27
    ),
)

def status(optim_result):   
    # Get all the tested models
    all_models = pd.DataFrame(bayes_cv_tuner.cv_results_)    
    
    # Get the best parameters to display   
    best_params = pd.Series(bayes_opt.best_params_)
    print('Model #{}\nBest AUC: {}\nBest params: {}\n'.format(len(all_models),
                                                                  np.round(bayes_opt.best_score_, 4),
                                                                  bayes_opt.best_params_
    ))
    
    # Save all model results to narrow the prior distribution
    clf_name = bayes_opt.estimator.__class__.__name__
    all_models.to_csv(clf_name+"_opt_results.csv")

# Fit the model
result = bayes_opt.fit(X.values, y.values, callback=status)