In [6]:
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
import lightgbm as lgb
import pandas as pd
import skopt
import sys
import os


In [7]:
def start_category_from_zero(df,columns):
    for each in columns:
        df[each] = df[each] - 1

    return df

In [8]:
SEARCH_PARAMS = {'learning_rate': 0.4,
                'max_depth': 15,
                'num_leaves': 32,
                'feature_fraction': 0.8,
                'subsample': 0.2}

FIXED_PARAMS={'objective': 'multiclass',
             'num_class':4,
             'metric': 'multi_logloss',
             'is_unbalance':True,
             'bagging_freq':5,
             'boosting':'dart',
             'num_boost_round':300,
             'early_stopping_rounds':30}

def train_evaluate(search_params):
   # you can download the dataset from this link(https://www.kaggle.com/c/santander-customer-transaction-prediction/data)
   # import Dataset to play with it
   data= pd.read_csv("../../../data/feature/cba_train.csv")
   start_category_from_zero(data,['Reservation_Status'])
   print(data['Reservation_Status'].unique())
   X = data.iloc[:, :-1]
   y = data.iloc[:, -1]
   X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=1234)
   train_data = lgb.Dataset(X_train, label=y_train)
   valid_data = lgb.Dataset(X_valid, label=y_valid, reference=train_data)

   params = {'metric':FIXED_PARAMS['metric'],
             'objective':FIXED_PARAMS['objective'],
             'num_class':FIXED_PARAMS['num_class'],
             **search_params}

   model = lgb.train(params, train_data,                     
                     valid_sets=[valid_data],
                     num_boost_round=FIXED_PARAMS['num_boost_round'],
                     early_stopping_rounds=FIXED_PARAMS['early_stopping_rounds'],
                     valid_names=['valid'])
   score = model.best_score['valid']['multi_logloss']
   return score

In [9]:
SPACE = [
   skopt.space.Real(0.01, 0.5, name='learning_rate', prior='log-uniform'),
   skopt.space.Integer(1, 30, name='max_depth'),
   skopt.space.Integer(10, 200, name='num_leaves'),
   skopt.space.Real(0.1, 1.0, name='feature_fraction', prior='uniform'),
   skopt.space.Real(0.1, 1.0, name='subsample', prior='uniform')
]
@skopt.utils.use_named_args(SPACE)
def objective(**params):
   return -1.0 * train_evaluate(params)

results = skopt.forest_minimize(objective, SPACE, 
                                n_calls=100, n_random_starts=10)
sk_utils.log_results(results)



[0 1 2]
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 534
[LightGBM] [Info] Number of data points in the train set: 21999, number of used features: 17
[LightGBM] [Info] Start training from score -0.256960
[LightGBM] [Info] Start training from score -1.902544
[LightGBM] [Info] Start training from score -2.558606
[LightGBM] [Info] Start training from score -34.538776
[1]	valid's multi_logloss: 0.682711
Training until validation scores don't improve for 30 rounds
[2]	valid's multi_logloss: 0.680119
[3]	valid's multi_logloss: 0.679287
[4]	valid's multi_logloss: 0.679406
[5]	valid's multi_logloss: 0.676288
[6]	valid's multi_logloss: 0.666256
[7]	valid's multi_logloss: 0.666449
[8]	valid's multi_logloss: 0.666636
[9]	valid's multi_logloss: 0.666508
[10]	valid's multi_logloss: 0.666878
[11]	valid's multi_logloss: 0.665995
[12]	valid's multi_logloss: 0.663547
[13]	valid's multi_logloss: 0.6

NameError: name 'sk_utils' is not defined