In [1]:
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
import lightgbm as lgb
import pandas as pd
import skopt
import sys
import os


In [2]:
SEARCH_PARAMS = {'learning_rate': 0.4,
                'max_depth': 15,
                'num_leaves': 32,
                'feature_fraction': 0.8,
                'subsample': 0.2}

FIXED_PARAMS={'objective': 'multiclass',
             'num_class':3,
             'metric': 'multi_logloss',
             'is_unbalance':True,
             'bagging_freq':5,
             'boosting':'dart',
             'num_boost_round':300,
             'early_stopping_rounds':30}

def train_evaluate(search_params):
   # you can download the dataset from this link(https://www.kaggle.com/c/santander-customer-transaction-prediction/data)
   # import Dataset to play with it
   data= pd.read_csv("../../../data/feature/cba_train.csv")
   X = data.iloc[:, :-1]
   y = data.iloc[:, -1]-1
   X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=1234)
   train_data = lgb.Dataset(X_train, label=y_train)
   valid_data = lgb.Dataset(X_valid, label=y_valid, reference=train_data)

   params = {'metric':FIXED_PARAMS['metric'],
             'objective':FIXED_PARAMS['objective'],
             'num_class':FIXED_PARAMS['num_class'],
             **search_params}

   model = lgb.train(params, train_data,                     
                     valid_sets=[valid_data],
                     num_boost_round=FIXED_PARAMS['num_boost_round'],
                     early_stopping_rounds=FIXED_PARAMS['early_stopping_rounds'],
                     valid_names=['valid'])
   score = model.best_score['valid']['multi_logloss']
   return score

In [3]:
SPACE = [
   skopt.space.Real(0.01, 0.5, name='learning_rate', prior='log-uniform'),
   skopt.space.Integer(1, 30, name='max_depth'),
   skopt.space.Integer(10, 200, name='num_leaves'),
   skopt.space.Real(0.1, 1.0, name='feature_fraction', prior='uniform'),
   skopt.space.Real(0.1, 1.0, name='subsample', prior='uniform')
]
@skopt.utils.use_named_args(SPACE)
def objective(**params):
   return -1.0 * train_evaluate(params)

results = skopt.forest_minimize(objective, SPACE, 
                                n_calls=100, n_random_starts=10)
sk_utils.log_results(results)



her splits with positive gain, best gain: -inf
[31]	valid's multi_logloss: 0.681053
[32]	valid's multi_logloss: 0.681266
[33]	valid's multi_logloss: 0.681591
[34]	valid's multi_logloss: 0.682332
[35]	valid's multi_logloss: 0.683562
[36]	valid's multi_logloss: 0.683777
[37]	valid's multi_logloss: 0.685522
[38]	valid's multi_logloss: 0.685067
[39]	valid's multi_logloss: 0.685971
Early stopping, best iteration is:
[9]	valid's multi_logloss: 0.670955
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 534
[LightGBM] [Info] Number of data points in the train set: 21999, number of used features: 17
[LightGBM] [Info] Start training from score -0.256960
[LightGBM] [Info] Start training from score -1.902544
[LightGBM] [Info] Start training from score -2.558606
[1]	valid's multi_logloss: 0.68668
Training until validation scores don't improve for 30 rounds
[2]	valid's multi_logloss: 0.682039
[3]	val

NameError: name 'sk_utils' is not defined

In [7]:
results

          fun: -0.6786117657008143
    func_vals: array([-0.6530641 , -0.65685317, -0.65635307, -0.65316435, -0.65692355,
       -0.65196275, -0.65161128, -0.65804859, -0.66451461, -0.66855338,
       -0.66382629, -0.66847703, -0.6568113 , -0.65840178, -0.66998854,
       -0.66089595, -0.65475082, -0.66331464, -0.6565183 , -0.65244418,
       -0.66288576, -0.66657493, -0.65762524, -0.66951846, -0.67460179,
       -0.67230352, -0.66128891, -0.6720062 , -0.66030552, -0.65774842,
       -0.66860009, -0.65075282, -0.67349987, -0.66997205, -0.65902416,
       -0.66758999, -0.66893929, -0.67428442, -0.65710855, -0.66651376,
       -0.66681126, -0.67078525, -0.66707354, -0.67186868, -0.66803957,
       -0.66726807, -0.66658572, -0.67028817, -0.66763558, -0.66782713,
       -0.65018218, -0.6525538 , -0.66867195, -0.67480484, -0.67861177,
       -0.67616935, -0.66650762, -0.6755459 , -0.65707761, -0.67440496,
       -0.67411545, -0.67185391, -0.66702949, -0.67241269, -0.66516628,
       -0.6665