In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, plot_confusion_matrix

from skopt.space import Integer, Real, Categorical
from skopt import BayesSearchCV
from scipy.stats import uniform, loguniform

from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

from sklearn.preprocessing import StandardScaler
import pickle

In [2]:
df = pd.read_csv('data/explored_cmv&unpop_data')

In [3]:
df.drop(columns = ['Unnamed: 0'], inplace = True)

***Model creation and optimization***

In [4]:
x_list = df.select_dtypes(exclude = np.dtype('O')).columns.tolist()
x_list.remove('post_subreddit')
X = df[x_list]
y = df['post_subreddit']

X = X.astype(np.dtype('float64'))

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, stratify = y)

- Dummy

In [5]:
df['post_subreddit'].value_counts(normalize = True)

0    0.563316
1    0.436684
Name: post_subreddit, dtype: float64

- XGBoost

In [6]:
xgb_pipe = Pipeline([
    ('xgb', XGBClassifier(base_score = .563316, random_state = 42, booster = 'gbtree', use_label_encoder = False, verbosity = 0))
])

xgb_params = {
    'xgb__n_estimators': Integer(1, 100),
    'xgb__max_depth': Integer(1, 100),
    'xgb__max_delta_step': Real(0, .5),
    'xgb__min_child_weight': Real(0, 2, prior='uniform'),
    'xgb__learning_rate': Real(0, .2, prior='uniform')
}

xgb_bs = BayesSearchCV(estimator = xgb_pipe,
                     search_spaces = xgb_params,
                     scoring = 'f1',
                     n_iter = 50,
                     n_jobs = 8,
                     cv = 5,
                     verbose = 1,
                     refit = True,
                     optimizer_kwargs = {'base_estimator': 'RF'},
                     random_state=42)

xgb_bs_rocauc = BayesSearchCV(estimator = xgb_pipe,
                     search_spaces = xgb_params,
                     scoring = 'roc_auc',
                     n_iter = 50,
                     n_jobs = 8,
                     cv = 5,
                     verbose = 1,
                     refit = True,
                     optimizer_kwargs = {'base_estimator': 'RF'},
                     random_state=42)

In [7]:
xgb_bs.fit(X_train, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    7.7s remaining:   11.6s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    7.7s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.1min remaining:  1.7min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.1min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  2.1min remaining:  3.2min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  2.1min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    6.2s remaining:    9.3s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    6.4s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   24.7s remaining:   37.1s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   25.5s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   26.5s remaining:   39.9s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   26.9s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   11.0s remaining:   16.6s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   11.1s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   51.1s remaining:  1.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   51.4s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   17.1s remaining:   25.7s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   17.5s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.0min remaining:  1.6min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.1min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    4.4s remaining:    6.6s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    4.4s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   39.0s remaining:   58.5s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   39.7s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   10.2s remaining:   15.3s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   10.4s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   10.1s remaining:   15.1s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   10.2s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   32.3s remaining:   48.5s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   32.6s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   59.8s remaining:  1.5min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   59.9s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   27.2s remaining:   40.9s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   27.7s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   56.5s remaining:  1.4min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   57.3s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    6.4s remaining:    9.7s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    6.7s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   50.1s remaining:  1.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   51.0s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   55.7s remaining:  1.4min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   58.2s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.3min remaining:  1.9min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.3min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.3min remaining:  2.0min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.3min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   54.5s remaining:  1.4min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   55.1s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   51.7s remaining:  1.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   52.0s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   52.2s remaining:  1.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   52.8s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   47.6s remaining:  1.2min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   48.1s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   45.6s remaining:  1.1min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   45.9s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   36.5s remaining:   54.8s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   37.5s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   10.9s remaining:   16.4s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   11.0s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.0min remaining:  1.5min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.1min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   49.1s remaining:  1.2min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   51.1s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.1min remaining:  1.6min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.1min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.0min remaining:  1.5min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.0min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   57.9s remaining:  1.4min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   58.2s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   28.0s remaining:   42.0s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   28.2s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.2min remaining:  1.8min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.2min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.3min remaining:  1.9min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.3min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.4min remaining:  2.0min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.4min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.5min remaining:  2.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.6min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.3min remaining:  2.0min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.3min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  3.4min remaining:  5.1min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  3.4min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.5min remaining:  2.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.5min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   47.2s remaining:  1.2min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   47.5s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.6min remaining:  2.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.6min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.7min remaining:  2.6min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.7min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.0min remaining:  1.5min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.0min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   41.1s remaining:  1.0min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   41.7s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.1min remaining:  1.6min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.1min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.2min remaining:  1.7min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.2min finished


BayesSearchCV(cv=5,
              estimator=Pipeline(steps=[('xgb',
                                         XGBClassifier(base_score=0.563316,
                                                       booster='gbtree',
                                                       colsample_bylevel=None,
                                                       colsample_bynode=None,
                                                       colsample_bytree=None,
                                                       gamma=None, gpu_id=None,
                                                       importance_type='gain',
                                                       interaction_constraints=None,
                                                       learning_rate=None,
                                                       max_delta_step=None,
                                                       max_depth=None,
                                                       min_child_weight=None,
    

In [8]:
xgb = xgb_bs.best_estimator_['xgb']

In [9]:
pickle.dump(xgb, open('models/XGBoost.sav', 'wb'))

In [10]:
xgb_bs_rocauc.fit(X_train, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    2.7s remaining:    4.0s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    2.7s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   49.9s remaining:  1.2min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   50.5s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  2.1min remaining:  3.2min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  2.2min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    6.2s remaining:    9.4s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    6.3s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   24.8s remaining:   37.2s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   25.3s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   26.3s remaining:   39.4s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   26.8s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   10.8s remaining:   16.3s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   11.0s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   50.9s remaining:  1.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   51.4s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   16.8s remaining:   25.3s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   17.5s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.0min remaining:  1.6min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.1min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   26.9s remaining:   40.4s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   27.1s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   44.5s remaining:  1.1min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   45.5s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   15.2s remaining:   22.9s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   15.5s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    2.2s remaining:    3.4s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    2.4s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.6min remaining:  2.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.6min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   17.5s remaining:   26.3s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   18.0s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   20.8s remaining:   31.3s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   21.6s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    9.4s remaining:   14.2s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    9.6s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   22.3s remaining:   33.5s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   22.5s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    9.4s remaining:   14.1s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    9.4s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   21.7s remaining:   32.6s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   22.0s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   14.5s remaining:   21.8s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   14.8s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   52.8s remaining:  1.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   53.4s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   41.4s remaining:  1.0min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   42.1s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   13.3s remaining:   20.0s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   13.7s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   28.7s remaining:   43.1s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   29.9s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   31.4s remaining:   47.2s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   31.8s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.0min remaining:  1.5min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.0min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   32.1s remaining:   48.2s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   32.2s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   55.1s remaining:  1.4min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   55.4s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.0min remaining:  1.5min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.0min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.2min remaining:  1.7min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.2min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    3.3s remaining:    5.0s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    3.5s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    0.7s remaining:    1.1s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    0.7s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:    6.7s remaining:   10.0s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:    6.8s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.3min remaining:  1.9min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.3min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.1min remaining:  1.6min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.1min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.1min remaining:  1.7min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.1min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.2min remaining:  1.8min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.2min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.1min remaining:  1.6min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.1min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.1min remaining:  1.7min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.2min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   10.7s remaining:   16.1s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   10.8s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   47.0s remaining:  1.2min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   47.9s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   38.5s remaining:   57.7s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   39.2s finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.5min remaining:  2.2min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.5min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.3min remaining:  2.0min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.3min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.6min remaining:  2.3min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.6min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.2min remaining:  1.8min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.2min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:  1.2min remaining:  1.7min
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:  1.2min finished


Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed:   29.6s remaining:   44.5s
[Parallel(n_jobs=8)]: Done   5 out of   5 | elapsed:   30.4s finished


BayesSearchCV(cv=5,
              estimator=Pipeline(steps=[('xgb',
                                         XGBClassifier(base_score=0.563316,
                                                       booster='gbtree',
                                                       colsample_bylevel=None,
                                                       colsample_bynode=None,
                                                       colsample_bytree=None,
                                                       gamma=None, gpu_id=None,
                                                       importance_type='gain',
                                                       interaction_constraints=None,
                                                       learning_rate=None,
                                                       max_delta_step=None,
                                                       max_depth=None,
                                                       min_child_weight=None,
    

In [11]:
xgb_rocauc = xgb_bs_rocauc.best_estimator_['xgb']

In [12]:
pickle.dump(xgb_rocauc, open('models/XGBoost_rocauc.sav', 'wb'))

- Logistic Regression

In [13]:
logreg_pipe = Pipeline([
    ('logreg', LogisticRegression(random_state = 42, solver = 'saga'))
])

logreg_params = {
    'logreg__penalty': Categorical(['l1', 'l2', 'elasticnet']),
    'logreg__tol': Real(0, .1, prior='uniform'),
    'logreg__C': Real(0.0001, 100, prior='log-uniform'),
    'logreg__class_weight': Categorical(['balanced', None]),
    'logreg__max_iter': Integer(500, 1000),
    'logreg__l1_ratio': Real(0, 1, prior='uniform')
}

logreg_bs = BayesSearchCV(estimator = logreg_pipe,
                     search_spaces = logreg_params,
                     scoring = 'f1',
                     n_iter = 50,
                     n_jobs = 8,
                     cv = 5,
                     refit = True,
                     optimizer_kwargs = {'base_estimator': 'RF'},
                     random_state=42)

logreg_bs_rocauc = BayesSearchCV(estimator = logreg_pipe,
                     search_spaces = logreg_params,
                     scoring = 'roc_auc',
                     n_iter = 50,
                     n_jobs = 8,
                     cv = 5,
                     refit = True,
                     optimizer_kwargs = {'base_estimator': 'RF'},
                     random_state=42)

In [14]:
logreg_bs.fit(X_train, y_train)

BayesSearchCV(cv=5,
              estimator=Pipeline(steps=[('logreg',
                                         LogisticRegression(random_state=42,
                                                            solver='saga'))]),
              n_jobs=8, optimizer_kwargs={'base_estimator': 'RF'},
              random_state=42, scoring='f1',
              search_spaces={'logreg__C': Real(low=0.0001, high=100, prior='log-uniform', transform='identity'),
                             'logreg__class_weight': Categorical(categories=('balanced', None), prior=None),
                             'logreg__l1_ratio': Real(low=0, high=1, prior='uniform', transform='identity'),
                             'logreg__max_iter': Integer(low=500, high=1000, prior='uniform', transform='identity'),
                             'logreg__penalty': Categorical(categories=('l1', 'l2', 'elasticnet'), prior=None),
                             'logreg__tol': Real(low=0, high=0.1, prior='uniform', transform='identit

In [15]:
logreg = logreg_bs.best_estimator_['logreg']

In [16]:
pickle.dump(logreg, open('models/LogisticRegression.sav', 'wb'))

In [17]:
logreg_bs_rocauc.fit(X_train, y_train)

  "(penalty={})".format(self.penalty))


BayesSearchCV(cv=5,
              estimator=Pipeline(steps=[('logreg',
                                         LogisticRegression(random_state=42,
                                                            solver='saga'))]),
              n_jobs=8, optimizer_kwargs={'base_estimator': 'RF'},
              random_state=42, scoring='roc_auc',
              search_spaces={'logreg__C': Real(low=0.0001, high=100, prior='log-uniform', transform='identity'),
                             'logreg__class_weight': Categorical(categories=('balanced', None), prior=None),
                             'logreg__l1_ratio': Real(low=0, high=1, prior='uniform', transform='identity'),
                             'logreg__max_iter': Integer(low=500, high=1000, prior='uniform', transform='identity'),
                             'logreg__penalty': Categorical(categories=('l1', 'l2', 'elasticnet'), prior=None),
                             'logreg__tol': Real(low=0, high=0.1, prior='uniform', transform='id

In [18]:
logreg_rocauc = logreg_bs_rocauc.best_estimator_['logreg']

In [19]:
pickle.dump(logreg_rocauc, open('models/LogisticRegression_rocauc.sav', 'wb'))

- Decision Tree

In [20]:
dtree_pipe = Pipeline([
    ('dtree', DecisionTreeClassifier(random_state = 42))
])

dtree_params = {
    'dtree__criterion': Categorical(['gini', 'entropy']),
    'dtree__splitter': Categorical(['best', 'random']),
    'dtree__max_depth': Integer(1, 100),
    'dtree__min_samples_split': Real(0, .5),
    'dtree__min_samples_leaf': Real(0, .5),
    'dtree__max_features': Categorical(['auto', 'sqrt', 'log2']),
    'dtree__min_impurity_decrease': Real(0, .2, prior='uniform'),
    'dtree__ccp_alpha': Real(0, .2, prior='uniform')
}

dtree_bs = BayesSearchCV(estimator = dtree_pipe,
                     search_spaces = dtree_params,
                     scoring = 'f1',
                     n_iter = 50,
                     cv = 5,
                     n_jobs = 8,
                     refit = True,
                     optimizer_kwargs = {'base_estimator': 'RF'},
                     random_state=42)

dtree_bs_rocauc = BayesSearchCV(estimator = dtree_pipe,
                     search_spaces = dtree_params,
                     scoring = 'roc_auc',
                     n_iter = 50,
                     cv = 5,
                     n_jobs = 8,
                     refit = True,
                     optimizer_kwargs = {'base_estimator': 'RF'},
                     random_state=42)

In [21]:
dtree_bs.fit(X_train, y_train)

BayesSearchCV(cv=5,
              estimator=Pipeline(steps=[('dtree',
                                         DecisionTreeClassifier(random_state=42))]),
              n_jobs=8, optimizer_kwargs={'base_estimator': 'RF'},
              random_state=42, scoring='f1',
              search_spaces={'dtree__ccp_alpha': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'dtree__criterion': Categorical(categories=('gini', 'entropy'), prior=None),
                             'dtree__m...
                             'dtree__max_features': Categorical(categories=('auto', 'sqrt', 'log2'), prior=None),
                             'dtree__min_impurity_decrease': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'dtree__min_samples_leaf': Real(low=0, high=0.5, prior='uniform', transform='identity'),
                             'dtree__min_samples_split': Real(low=0, high=0.5, prior='uniform', transform='identity'),


In [22]:
dtree = dtree_bs.best_estimator_['dtree']

In [23]:
pickle.dump(dtree, open('models/DecisionTree.sav', 'wb'))

In [24]:
dtree_bs_rocauc.fit(X_train, y_train)

BayesSearchCV(cv=5,
              estimator=Pipeline(steps=[('dtree',
                                         DecisionTreeClassifier(random_state=42))]),
              n_jobs=8, optimizer_kwargs={'base_estimator': 'RF'},
              random_state=42, scoring='roc_auc',
              search_spaces={'dtree__ccp_alpha': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'dtree__criterion': Categorical(categories=('gini', 'entropy'), prior=None),
                             'dtr...
                             'dtree__max_features': Categorical(categories=('auto', 'sqrt', 'log2'), prior=None),
                             'dtree__min_impurity_decrease': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'dtree__min_samples_leaf': Real(low=0, high=0.5, prior='uniform', transform='identity'),
                             'dtree__min_samples_split': Real(low=0, high=0.5, prior='uniform', transform='identity'),


In [25]:
dtree_rocauc = dtree_bs_rocauc.best_estimator_['dtree']

In [26]:
pickle.dump(dtree_rocauc, open('models/DecisionTree_rocauc.sav', 'wb'))

- Random Forest

In [27]:
rf_pipe = Pipeline([
    ('rf', RandomForestClassifier(random_state = 42))
])

rf_params = {
    'rf__criterion': Categorical(['gini', 'entropy']),
    'rf__n_estimators': Integer(5, 200),
    'rf__min_samples_split': Real(0, .5),
    'rf__min_samples_leaf': Real(0, .5),
    'rf__max_depth': Integer(1, 100),
    'rf__max_features': Categorical(['auto', 'sqrt', 'log2']),
    'rf__min_impurity_decrease': Real(0, .2, prior='uniform'),
    'rf__ccp_alpha': Real(0, .2, prior='uniform'),
    'rf__max_samples': Real(0, 1)
}

rf_bs = BayesSearchCV(estimator = rf_pipe,
                     search_spaces = rf_params,
                     scoring = 'f1',
                     n_iter = 50,
                     cv = 5,
                     n_jobs = 8,
                     refit = True,
                     optimizer_kwargs = {'base_estimator': 'RF'},
                     random_state=42)

rf_bs_rocauc = BayesSearchCV(estimator = rf_pipe,
                     search_spaces = rf_params,
                     scoring = 'roc_auc',
                     n_iter = 50,
                     cv = 5,
                     n_jobs = 8,
                     refit = True,
                     optimizer_kwargs = {'base_estimator': 'RF'},
                     random_state=42)

In [28]:
rf_bs.fit(X_train, y_train)

BayesSearchCV(cv=5,
              estimator=Pipeline(steps=[('rf',
                                         RandomForestClassifier(random_state=42))]),
              n_jobs=8, optimizer_kwargs={'base_estimator': 'RF'},
              random_state=42, scoring='f1',
              search_spaces={'rf__ccp_alpha': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'rf__criterion': Categorical(categories=('gini', 'entropy'), prior=None),
                             'rf__max_depth': In...
                             'rf__max_samples': Real(low=0, high=1, prior='uniform', transform='identity'),
                             'rf__min_impurity_decrease': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'rf__min_samples_leaf': Real(low=0, high=0.5, prior='uniform', transform='identity'),
                             'rf__min_samples_split': Real(low=0, high=0.5, prior='uniform', transform='identity'),
              

In [29]:
rf = rf_bs.best_estimator_['rf']

In [30]:
pickle.dump(rf, open('models/RandomForest.sav', 'wb'))

In [31]:
rf_bs_rocauc.fit(X_train, y_train)

BayesSearchCV(cv=5,
              estimator=Pipeline(steps=[('rf',
                                         RandomForestClassifier(random_state=42))]),
              n_jobs=8, optimizer_kwargs={'base_estimator': 'RF'},
              random_state=42, scoring='roc_auc',
              search_spaces={'rf__ccp_alpha': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'rf__criterion': Categorical(categories=('gini', 'entropy'), prior=None),
                             'rf__max_dept...
                             'rf__max_samples': Real(low=0, high=1, prior='uniform', transform='identity'),
                             'rf__min_impurity_decrease': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'rf__min_samples_leaf': Real(low=0, high=0.5, prior='uniform', transform='identity'),
                             'rf__min_samples_split': Real(low=0, high=0.5, prior='uniform', transform='identity'),
               

In [32]:
rf_rocauc = rf_bs_rocauc.best_estimator_['rf']

In [33]:
pickle.dump(rf_rocauc, open('models/RandomForest_rocauc.sav', 'wb'))

- Extra Trees

In [34]:
et_pipe = Pipeline([
    ('et', ExtraTreesClassifier(random_state = 42))
])

et_params = {
    'et__criterion': Categorical(['gini', 'entropy']),
    'et__n_estimators': Integer(5, 200),
    'et__min_samples_split': Real(0, .5),
    'et__min_samples_leaf': Real(0, .5),
    'et__max_depth': Integer(1, 100),
    'et__max_features': Categorical(['auto', 'sqrt', 'log2']),
    'et__min_impurity_decrease': Real(0, .2, prior='uniform'),
    'et__ccp_alpha': Real(0, .2, prior='uniform'),
    'et__max_samples': Real(0, 1)
}

et_bs = BayesSearchCV(estimator = et_pipe,
                     search_spaces = et_params,
                     scoring = 'f1',
                     n_iter = 50,
                     cv = 5,
                     refit = True,
                     n_jobs = 8,
                     optimizer_kwargs = {'base_estimator': 'RF'},
                     random_state=42)

et_bs_rocauc = BayesSearchCV(estimator = et_pipe,
                     search_spaces = et_params,
                     scoring = 'roc_auc',
                     n_iter = 50,
                     cv = 5,
                     refit = True,
                     n_jobs = 8,
                     optimizer_kwargs = {'base_estimator': 'RF'},
                     random_state=42)

In [35]:
et_bs.fit(X_train, y_train)

BayesSearchCV(cv=5,
              estimator=Pipeline(steps=[('et',
                                         ExtraTreesClassifier(random_state=42))]),
              n_jobs=8, optimizer_kwargs={'base_estimator': 'RF'},
              random_state=42, scoring='f1',
              search_spaces={'et__ccp_alpha': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'et__criterion': Categorical(categories=('gini', 'entropy'), prior=None),
                             'et__max_depth': Inte...
                             'et__max_samples': Real(low=0, high=1, prior='uniform', transform='identity'),
                             'et__min_impurity_decrease': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'et__min_samples_leaf': Real(low=0, high=0.5, prior='uniform', transform='identity'),
                             'et__min_samples_split': Real(low=0, high=0.5, prior='uniform', transform='identity'),
              

In [36]:
et = et_bs.best_estimator_['et']

In [37]:
pickle.dump(et, open('models/ExtraTrees.sav', 'wb'))

In [38]:
et_bs_rocauc.fit(X_train, y_train)

BayesSearchCV(cv=5,
              estimator=Pipeline(steps=[('et',
                                         ExtraTreesClassifier(random_state=42))]),
              n_jobs=8, optimizer_kwargs={'base_estimator': 'RF'},
              random_state=42, scoring='roc_auc',
              search_spaces={'et__ccp_alpha': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'et__criterion': Categorical(categories=('gini', 'entropy'), prior=None),
                             'et__max_depth'...
                             'et__max_samples': Real(low=0, high=1, prior='uniform', transform='identity'),
                             'et__min_impurity_decrease': Real(low=0, high=0.2, prior='uniform', transform='identity'),
                             'et__min_samples_leaf': Real(low=0, high=0.5, prior='uniform', transform='identity'),
                             'et__min_samples_split': Real(low=0, high=0.5, prior='uniform', transform='identity'),
               

In [39]:
et_rocauc = et_bs_rocauc.best_estimator_['et']

In [40]:
pickle.dump(et_rocauc, open('models/ExtraTrees_rocauc.sav', 'wb'))

- To step 6 ->