In [1]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
# from sklearn.model_selection import StratifiedKFold
# from sklearn.model_selection import RandomizedSearchCV
from skopt import BayesSearchCV
from sklearn.model_selection import train_test_split

In [2]:
train_df = pd.read_csv('../input/train.csv')
test_df = pd.read_csv('../input/test.csv')
features = [c for c in train_df.columns if c not in ['ID_code', 'target']] # basic features
target = train_df['target']

In [27]:
X_train, X_test, y_train, y_test = train_test_split(train_df, target, train_size=0.80, random_state=42)

predictions = np.zeros(len(test_df))
trn_data = lgb.Dataset(train_df[features], label=target)



In [28]:
param = {
    'bagging_freq': 5,          
    'bagging_fraction': 0.38,   'boost_from_average':'false',   
    'boost': 'gbdt',             'feature_fraction': 0.04,     'learning_rate': 0.0085,
    'max_depth': 5,             'metric':'auc',                'min_data_in_leaf': 80,     'min_sum_hessian_in_leaf': 10.0,
    'num_leaves': 13,            'num_threads': 8,              'tree_learner': 'serial',   'objective': 'binary',
    'reg_alpha': 0.1302650970728192, 'reg_lambda': 0.3603427518866501,'verbosity': 1
}

In [38]:
opt = BayesSearchCV(
    lgb.LGBMClassifier(objective='binary', metric='auc'),
    {
        'learning_rate':[0.009, 0.01, 0.02], 
        'num_leaves' :[20,30],
        'bagging_freq':[5],
        'boost_from_average': ['false'],
        'feature_fraction':[0.04], 
        'max_depth':[5],
        'metric':['auc'],
        'min_sum_hessian_in_leaf':[10.0],
        'num_threads':[8],
        'tree_learner':['serial'], 
        'objective':['binary'],
        'reg_alpha':[0.1302650970728192], 
        'reg_lambda': [0.3603427518866501], 
        'verbosity':[1],
        'min_data_in_leaf':[70,90,100], 
        'bagging_fraction':[0.2,0.5,0.6],
    },
     fit_params={
             'eval_set': (X_test[features], y_test),
             'eval_metric': 'auc', 
             'early_stopping_rounds': 1000,
             },
    n_jobs=-1, cv=5,
)

In [39]:
opt.fit(X_train[features], y_train)
print("val. score: %s" % opt.best_score_)
print("Best parameters: ", opt.best_params_)



val. score: 0.89999375
Best parameters:  {'bagging_fraction': 0.6, 'bagging_freq': 5, 'boost_from_average': 'false', 'feature_fraction': 0.04, 'learning_rate': 0.01, 'max_depth': 5, 'metric': 'auc', 'min_data_in_leaf': 100, 'min_sum_hessian_in_leaf': 10.0, 'num_leaves': 26, 'num_threads': 8, 'objective': 'binary', 'reg_alpha': 0.1302650970728192, 'reg_lambda': 0.3603427518866501, 'tree_learner': 'serial', 'verbosity': 1}


In [None]:
# {'bagging_fraction': 0.6, 'bagging_freq': 5, 'boost_from_average': 'false', 'feature_fraction': 0.04, 
#  'learning_rate': 0.01, 'max_depth': 5, 'metric': 'auc', 'min_data_in_leaf': 100, 'min_sum_hessian_in_leaf': 10.0, 
#  'num_leaves': 26, 'num_threads': 8, 'objective': 'binary', 'reg_alpha': 0.1302650970728192, 
#  'reg_lambda': 0.3603427518866501, 'tree_learner': 'serial', 'verbosity': 1}