In [1]:
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
%matplotlib inline
rcParams['figure.figsize'] = 19, 12
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

import collections
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

import xgboost as xgb
import lightgbm as lgb
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn import metrics, cross_validation



In [2]:
def parse_params(best, listed):
    best = best.copy()
    if len(listed) == 0:
        return best
    else :
        for key in best:
            if key in listed.keys():
                best[key] = listed[key][int(best[key])]
        return best

In [62]:
def optimize_LGB(x_train, y_train, params, max_evals):
    def objective(params):
        params['num_leaves'] = int(params['num_leaves'])
        params['bagging_freq'] = int(params['bagging_freq'])
        skf = cross_validation.StratifiedKFold(
            y_train, # Samples to split in K folds
            n_folds=5, # Number of folds. Must be at least 2.
            shuffle=True, # Whether to shuffle each stratification of the data before splitting into batches.
            random_state=423 # pseudo-random number generator state used for shuffling
        )
        boost_rounds = []
        score = []

        for train, test in skf:
            _train_x, _test_x, _train_y, _test_y = \
                x_train.iloc[train], x_train.iloc[test], y_train[train], y_train[test]
            
            train_lgb = lgb.Dataset(np.array(_train_x),np.array(_train_y))
            test_lgb = lgb.Dataset(np.array(_test_x),np.array(_test_y),reference=train_lgb)
            
            model = lgb.train(
                params,
                train_lgb,
                num_boost_round=10000,
                valid_sets=test_lgb,
                early_stopping_rounds=200
            )
            
            boost_rounds.append(model.best_iteration)
            score.append(model.best_score)
            #score.append(-verify_accuracy(binary_predict(model.predict(_test_x), 0.5), _test_y))
            
        # print('nb_trees={} val_loss={}'.format(boost_rounds, score))
        # print(len(score))
        mean_score = np.mean([list(score[k]['valid_0'].values())[0] 
                              for k in range(len(score))])
        #mean_score = np.mean(score)

        # print('average of best iteration:', np.average(boost_rounds))
        return {'loss': mean_score, 'status': STATUS_OK}
    
    trials = Trials()
    # minimize the objective over the space
    best_params = fmin(
        fn=objective,
        space=params,
        algo=tpe.suggest,
        trials=trials,
        max_evals=max_evals
    )
    print("Done")
    return {'best_params': best_params,
            'trials': trials}

In [4]:
username = 'takes'
username = 'morinibu'
data_directory = 'C://Users//' + username + '//GitHub//kaggle//data//home_credit'
main_directory = 'C://Users//' + username + '//GitHub//kaggle//home_credit'

In [6]:
data = pd.read_csv(main_directory + '//' + 'conbined_data.csv')

In [7]:
data.head()

Unnamed: 0,AMT_ANNUITY,AMT_CREDIT,AMT_GOODS_PRICE,AMT_INCOME_TOTAL,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_YEAR,...,CC_NAME_CONTRACT_STATUS_Signed_MAX,CC_NAME_CONTRACT_STATUS_Signed_MEAN,CC_NAME_CONTRACT_STATUS_Signed_SUM,CC_NAME_CONTRACT_STATUS_Signed_VAR,CC_NAME_CONTRACT_STATUS_nan_MIN,CC_NAME_CONTRACT_STATUS_nan_MAX,CC_NAME_CONTRACT_STATUS_nan_MEAN,CC_NAME_CONTRACT_STATUS_nan_SUM,CC_NAME_CONTRACT_STATUS_nan_VAR,CC_COUNT
0,24700.5,406597.5,351000.0,202500.0,0.0,0.0,0.0,0.0,0.0,1.0,...,,,,,,,,,,
1,35698.5,1293502.5,1129500.0,270000.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2,6750.0,135000.0,135000.0,67500.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3,29686.5,312682.5,297000.0,135000.0,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0
4,21865.5,513000.0,513000.0,121500.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [39]:
data_test = data[data.TARGET.isnull()].reset_index(drop=True)
data_train = data[[not(k) for k in data.TARGET.isnull()]].reset_index(drop=True)

In [40]:
ID = data_test.SK_ID_CURR
data_train.drop(columns=['SK_ID_CURR'], inplace=True)
data_test.drop(columns=['SK_ID_CURR'], inplace=True)

In [41]:
y = data_train.TARGET
X = data_train
X.drop(columns=['TARGET'], inplace=True)

In [65]:
y_submit = data_test.TARGET
X_submit = data_test
X_submit.drop(columns=['TARGET'], inplace=True)

In [42]:
y

0         1.0
1         0.0
2         0.0
3         0.0
4         0.0
5         0.0
6         0.0
7         0.0
8         0.0
9         0.0
10        0.0
11        0.0
12        0.0
13        0.0
14        0.0
15        0.0
16        0.0
17        0.0
18        0.0
19        0.0
20        0.0
21        0.0
22        0.0
23        0.0
24        0.0
25        0.0
26        1.0
27        0.0
28        0.0
29        0.0
         ... 
307470    1.0
307471    0.0
307472    0.0
307473    0.0
307474    0.0
307475    0.0
307476    0.0
307477    0.0
307478    1.0
307479    0.0
307480    0.0
307481    0.0
307482    0.0
307483    0.0
307484    0.0
307485    0.0
307486    0.0
307487    0.0
307488    0.0
307489    0.0
307490    0.0
307491    0.0
307492    0.0
307493    0.0
307494    0.0
307495    0.0
307496    0.0
307497    0.0
307498    1.0
307499    0.0
Name: TARGET, Length: 307500, dtype: float64

In [93]:
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=0,
                                                   stratify=y)
listed = {
    'boosting_type': ['dart'],
    "objective": ['binary'],
    "metric": ['binary_error'],
    'eval_metric': ['auc']
}

params_LGB = {
    # 'task': 'train',
    #'boosting_type': hp.choice('boosting_type', listed['boosting_type']),
    'objective': hp.choice('objective', listed['objective']),
    #'metric': hp.choice('metric', listed['metric']),
    'eval_metric': hp.choice('eval_metric', listed['eval_metric']),
    'num_leaves': hp.quniform('num_leaves', 32,64,2),
    'max_depth': hp.quniform('max_depth', 3,12,1),
    'learning_rate': hp.loguniform('learning_rate', -6, -1),
    'feature_fraction': hp.uniform('feature_fraction', 0.5, 0.85),
    'bagging_fraction': hp.uniform('bagging_fraction', 0.6, 0.85),
    # "feature_fraction_seed": 30,
    "subsample": hp.uniform("subsample", 0.5, 0.8),
    "colsample_bytree": hp.uniform("colsample_bytree", 0.6, 0.8),
    "lambda_l2": hp.uniform('lambda_l2', 5, 10),
    "lambda_l1": hp.uniform('lambda_l1', 5, 10),
    #"drop_rate": hp.uniform('drop_rate', 0.15, 0.4),
    'bagging_freq': hp.quniform('bagging_freq', 1,10,1),
    'min_split_gain': hp.uniform('min_split_gain', 0.001, 0.1),
    'min_child_weight'; hp.uniform('min_child_weight', 10, 50)
    'silent' : -1,
    'verbose': -1
}

results_LGB = optimize_LGB(x_train, y_train, params_LGB, 3)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self.loc[key]
  'Please use {0} argument of the Dataset constructor to pass this parameter.'.format(key))


[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.657247
[3]	valid_0's binary_logloss: 0.657247
[4]	valid_0's binary_logloss: 0.657247
[5]	valid_0's binary_logloss: 0.657247
[6]	valid_0's binary_logloss: 0.657247
[7]	valid_0's binary_logloss: 0.657247
[8]	valid_0's binary_logloss: 0.657247
[9]	valid_0's binary_logloss: 0.657247
[10]	valid_0's binary_logloss: 0.657247
[11]	valid_0's binary_logloss: 0.657247
[12]	valid_0's binary_logloss: 0.657247
[13]	valid_0's binary_logloss: 0.657247
[14]	valid_0's binary_logloss: 0.657247
[15]	valid_0's binary_logloss: 0.657247
[16]	valid_0's binary_logloss: 0.657247
[17]	valid_0's binary_logloss: 0.657247
[18]	valid_0's binary_logloss: 0.657247
[19]	valid_0's binary_logloss: 0.657247
[20]	valid_0's binary_logloss: 0.657247
[21]	valid_0's binary_logloss: 0.657247
[22]	valid_0's binary_logloss: 0.657247
[23]	valid_0's binary_logloss: 0.657247
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.690115
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.687173
[3]	valid_0's binary_logloss: 0.684322
[4]	valid_0's binary_logloss: 0.684322
[5]	valid_0's binary_logloss: 0.684322
[6]	valid_0's binary_logloss: 0.684322
[7]	valid_0's binary_logloss: 0.684322
[8]	valid_0's binary_logloss: 0.684322
[9]	valid_0's binary_logloss: 0.684322
[10]	valid_0's binary_logloss: 0.684322
[11]	valid_0's binary_logloss: 0.684322
[12]	valid_0's binary_logloss: 0.684322
[13]	valid_0's binary_logloss: 0.684322
[14]	valid_0's binary_logloss: 0.684322
[15]	valid_0's binary_logloss: 0.684322
[16]	valid_0's binary_logloss: 0.684322
[17]	valid_0's binary_logloss: 0.684322
[18]	valid_0's binary_logloss: 0.684322
[19]	valid_0's binary_logloss: 0.684322
[20]	valid_0's binary_logloss: 0.684322
[21]	valid_0's binary_logloss: 0.684322
[22]	valid_0's binary_logloss: 0.684322
[23]	valid_0's binary_logloss: 0.684322
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

Done


In [94]:
param = parse_params(results_LGB['best_params'], listed)
param['bagging_freq'] = int(param['bagging_freq'])
param['num_leaves'] = int(param['num_leaves']) 
#param['max_depth'] = int(param['max_depth']) 

In [95]:
param_df = pd.DataFrame(list(param.items()),columns=['name','value'])
param_df.to_csv('result.csv', index=False)

In [84]:
train_lgb = lgb.Dataset(np.array(x_train),np.array(y_train))
test_lgb = lgb.Dataset(np.array(x_test),np.array(y_test),reference=train_lgb)

model = lgb.train(
    param,
    train_lgb,
    num_boost_round=10000,
    valid_sets=test_lgb,
    early_stopping_rounds=200
)

[1]	valid_0's binary_error: 0.0807154
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_error: 0.0806244
[3]	valid_0's binary_error: 0.0807545
[4]	valid_0's binary_error: 0.0807675
[5]	valid_0's binary_error: 0.0807935
[6]	valid_0's binary_error: 0.0807805
[7]	valid_0's binary_error: 0.0807285
[8]	valid_0's binary_error: 0.0807024
[9]	valid_0's binary_error: 0.0807285
[10]	valid_0's binary_error: 0.0806634
[11]	valid_0's binary_error: 0.0806504
[12]	valid_0's binary_error: 0.0806114
[13]	valid_0's binary_error: 0.0806244
[14]	valid_0's binary_error: 0.0805593
[15]	valid_0's binary_error: 0.0806114
[16]	valid_0's binary_error: 0.0807545
[17]	valid_0's binary_error: 0.0806634
[18]	valid_0's binary_error: 0.0805333
[19]	valid_0's binary_error: 0.0805333
[20]	valid_0's binary_error: 0.0804293
[21]	valid_0's binary_error: 0.0804033
[22]	valid_0's binary_error: 0.0804553
[23]	valid_0's binary_error: 0.0805333
[24]	valid_0's binary_error: 0.0805463
[25]	valid

[208]	valid_0's binary_error: 0.0827707
[209]	valid_0's binary_error: 0.0829138
[210]	valid_0's binary_error: 0.0827967
[211]	valid_0's binary_error: 0.0827187
[212]	valid_0's binary_error: 0.0827447
[213]	valid_0's binary_error: 0.0827317
[214]	valid_0's binary_error: 0.0828358
[215]	valid_0's binary_error: 0.0829138
[216]	valid_0's binary_error: 0.0829008
[217]	valid_0's binary_error: 0.0829008
[218]	valid_0's binary_error: 0.0827967
[219]	valid_0's binary_error: 0.0826927
[220]	valid_0's binary_error: 0.0826407
[221]	valid_0's binary_error: 0.0826146
Early stopping, best iteration is:
[21]	valid_0's binary_error: 0.0804033


In [85]:
from sklearn import metrics

fpr, tpr, thr = metrics.roc_curve(y_test, model.predict(x_test), pos_label=1.0)

In [86]:
metrics.auc(fpr, tpr)

0.7660747243095746

In [87]:
submission_data = pd.DataFrame([])
submission_data['SK_ID_CURR'] = ID
submission_data['TARGET'] = model.predict(X_submit)
submission_data.head()

Unnamed: 0,SK_ID_CURR,TARGET
0,100001,0.043679
1,100005,0.156628
2,100013,0.055508
3,100028,0.040551
4,100038,0.139693


In [88]:
submission_data.to_csv('submission.csv', index=False)