In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import time

import catboost as cb
import xgboost as xgb
from bayes_opt import BayesianOptimization
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score, classification_report
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans

In [2]:
from skopt import BayesSearchCV

In [3]:
os.listdir("data")

['sample_submission.csv',
 'test.csv',
 'test_new.csv',
 'test_new_1.csv',
 'train.csv',
 'train_new.csv',
 'train_new_1.csv']

In [4]:
data = pd.read_csv("data/train_new.csv")
test = pd.read_csv("data/test_new.csv")


In [5]:
x_train, y_train = data.drop("smoking",axis=1), data['smoking']

In [6]:


bayes_cv_tuner_cb = BayesSearchCV(
    estimator = cb.CatBoostClassifier(
    silent=True
    ),
    search_spaces = {
    "max_depth": (3, 15),
    "subsample" : (0.3, 0.7),
#     "leaf_estimation_iterations": (5, 150),
    "max_bin": (5, 40),
    "colsample_bylevel": (0.3, 0.9),
    "l2_leaf_reg": (2, 30),
    "learning_rate": (0.0001, 0.01),
    "n_estimators": (100, 10000)
    },
    cv=5,
    scoring = 'roc_auc',
    n_jobs = -1,
#     n_iter = 100,
    verbose = 1,
    refit = True,
    random_state = 72
    )
# resultCAT = bayes_cv_tuner_cb.fit(x_train, y_train)

In [7]:
resultCAT = bayes_cv_tuner_cb.fit(x_train, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

KeyboardInterrupt: 

15

In [None]:
bayes_cv_tuner_cb.best_params_

In [None]:

bayes_cv_tuner_xgb = BayesSearchCV(
    estimator = xgb.XGBClassifier(
    silent=True
    ),
    search_spaces = {
    "n_estimators" : (1000, 10000),
    "learning_rate": (0.0001, 0.01),
    "gamma" : (0, 7),
    "max_depth": (2, 20),
    "subsample": (0.3, 0.9),
    "min_child_weight": (1, 20),
#     "colsample_bytree": (0.1, 0.7),
#     "colsample_bylevel": (0.1, 0.7),
#     "colsample_bynode": (0.1, 0.7),
    "lambda": (1, 5),
    "alpha" : (0, 20)
    },
    cv=5,
    scoring = 'roc_auc',
    n_jobs = -1,
#     n_iter = 100,
    verbose = 1,
    refit = True,
    random_state = 72
    )
# resultXGB = bayes_cv_tuner_xgb.fit(x_train, y_train)

In [None]:
resultXGB = bayes_cv_tuner_xgb.fit(x_train, y_train)

In [None]:
bayes_cv_tuner_xgb.best_params_

In [None]:

bayes_cv_tuner_rf = BayesSearchCV(
    estimator = RandomForestClassifier(
#     silent=True
    ),
    search_spaces = {
    "n_estimators": (1000, 10000),
    "max_depth" : (2, 20),
#     "max_leaf_nodes": (2, 100),
#     "max_sample": (0.3, 0.9),
    "min_sample_split ": (2, 50),
    "min_samples_leaf": (5, 50)
#     "max_features ": ("sqrt", "log2"),
    },
#     cv=5,
    scoring = 'roc_auc',
    n_jobs = -1,
#     n_iter = 100,
    verbose = 1,
    refit = True,
    random_state = 72
    )

In [None]:
resulrf = bayes_cv_tuner_rf.fit(x_train, y_train)

In [None]:
bayes_cv_tuner_rf.best_params_

In [None]:
bayes_cv_tuner_hist = BayesSearchCV(
    estimator = HistGradientBoostingClassifier(
#     silent=True
    ),
    search_spaces = {
    "max_iter": (1000, 10000),
    "l2_regularization" : (0.0001, 0.01),
    "max_depth": (2, 20),
    "max_bins": (50, 500),
    "min_samples_leaf ": (5, 50),
    "max_leaf_nodes ": (5, 50),
    },
#     cv=5,
    scoring = 'roc_auc',
    n_jobs = -1,
#     n_iter = 100,
    verbose = 1,
    refit = True,
    random_state = 72
    )

In [None]:
resulrf = bayes_cv_tuner_hist.fit(x_train, y_train)

In [None]:
bayes_cv_tuner_hist.best_params_

In [None]:
bayes_cv_tuner_hist = BayesSearchCV(
    estimator = HistGradientBoostingClassifier(
#     silent=True
    ),
    search_spaces = {
    "max_iter": (1000, 10000),
    "l2_regularization" : (0.0001, 0.01),
    "max_depth": (2, 20),
    "max_bins": (50, 500),
    "min_samples_leaf ": (5, 50),
    "max_leaf_nodes ": (5, 50),
    },
#     cv=5,
    scoring = 'roc_auc',
    n_jobs = -1,
#     n_iter = 100,
    verbose = 1,
    refit = True,
    random_state = 72
    )