In [1]:
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
import lightgbm as lgb
import pandas as pd
import skopt
import sys
import os


In [14]:
SEARCH_PARAMS = {'learning_rate': 0.4,
                'max_depth': 15,
                'num_leaves': 32,
                'feature_fraction': 0.8,
                'subsample': 0.2}

FIXED_PARAMS={'objective': 'multiclass',
             'num_class':4,
             'metric': 'multi_logloss',
             'is_unbalance':True,
             'bagging_freq':5,
             'boosting':'dart',
             'num_boost_round':300,
             'early_stopping_rounds':30}

def train_evaluate(search_params):
   # you can download the dataset from this link(https://www.kaggle.com/c/santander-customer-transaction-prediction/data)
   # import Dataset to play with it
   data= pd.read_csv("../../../data/feature/cba_train.csv")
   X = data.iloc[:, :-1]
   y = data.iloc[:, -1]
   X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=1234)
   train_data = lgb.Dataset(X_train, label=y_train)
   valid_data = lgb.Dataset(X_valid, label=y_valid, reference=train_data)

   params = {'metric':FIXED_PARAMS['metric'],
             'objective':FIXED_PARAMS['objective'],
             'num_class':FIXED_PARAMS['num_class'],
             **search_params}

   model = lgb.train(params, train_data,                     
                     valid_sets=[valid_data],
                     num_boost_round=FIXED_PARAMS['num_boost_round'],
                     early_stopping_rounds=FIXED_PARAMS['early_stopping_rounds'],
                     valid_names=['valid'])
   score = model.best_score['valid']['multi_logloss']
   return score

In [15]:
SPACE = [
   skopt.space.Real(0.01, 0.5, name='learning_rate', prior='log-uniform'),
   skopt.space.Integer(1, 30, name='max_depth'),
   skopt.space.Integer(10, 200, name='num_leaves'),
   skopt.space.Real(0.1, 1.0, name='feature_fraction', prior='uniform'),
   skopt.space.Real(0.1, 1.0, name='subsample', prior='uniform')
]
@skopt.utils.use_named_args(SPACE)
def objective(**params):
   return -1.0 * train_evaluate(params)

results = skopt.forest_minimize(objective, SPACE, 
                                n_calls=100, n_random_starts=10)
sk_utils.log_results(results)



ng] No further splits with positive gain, best gain: -inf
[173]	valid's multi_logloss: 0.679032
[174]	valid's multi_logloss: 0.679006
[175]	valid's multi_logloss: 0.679009
[176]	valid's multi_logloss: 0.679007
[177]	valid's multi_logloss: 0.678881
[178]	valid's multi_logloss: 0.67888
[179]	valid's multi_logloss: 0.678863
[180]	valid's multi_logloss: 0.678788
[181]	valid's multi_logloss: 0.678772
[182]	valid's multi_logloss: 0.678756
[183]	valid's multi_logloss: 0.678756
[184]	valid's multi_logloss: 0.678745
[185]	valid's multi_logloss: 0.678746
[186]	valid's multi_logloss: 0.678749
[187]	valid's multi_logloss: 0.678729
[188]	valid's multi_logloss: 0.678735
[189]	valid's multi_logloss: 0.678736
[190]	valid's multi_logloss: 0.67867
[191]	valid's multi_logloss: 0.678671
[192]	valid's multi_logloss: 0.678641
[193]	valid's multi_logloss: 0.678625
[194]	valid's multi_logloss: 0.678616
[195]	valid's multi_logloss: 0.678624
[196]	valid's multi_logloss: 0.678624
[197]	valid's multi_logloss: 0.6

NameError: name 'sk_utils' is not defined