In [1]:
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
%matplotlib inline
rcParams['figure.figsize'] = 19, 12
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

import collections
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

import xgboost as xgb
import lightgbm as lgb
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn import metrics, cross_validation



In [2]:
def parse_params(best, listed):
    best = best.copy()
    if len(listed) == 0:
        return best
    else :
        for key in best:
            if key in listed.keys():
                best[key] = listed[key][int(best[key])]
        return best

In [3]:
def optimize_LGB(x_train, y_train, params, max_evals):
    def objective(params):
        params['num_leaves'] = int(params['num_leaves'])
        params['bagging_freq'] = int(params['bagging_freq'])
        skf = cross_validation.StratifiedKFold(
            y_train, # Samples to split in K folds
            n_folds=5, # Number of folds. Must be at least 2.
            shuffle=True, # Whether to shuffle each stratification of the data before splitting into batches.
            random_state=423 # pseudo-random number generator state used for shuffling
        )
        boost_rounds = []
        score = []

        for train, test in skf:
            _train_x, _test_x, _train_y, _test_y = \
                x_train.iloc[train], x_train.iloc[test], y_train[train], y_train[test]
            
            train_lgb = lgb.Dataset(np.array(_train_x),np.array(_train_y))
            test_lgb = lgb.Dataset(np.array(_test_x),np.array(_test_y),reference=train_lgb)
            
            model = lgb.train(
                params,
                train_lgb,
                num_boost_round=10000,
                valid_sets=test_lgb,
                early_stopping_rounds=200
            )
            
            boost_rounds.append(model.best_iteration)
            score.append(model.best_score)
            #score.append(-verify_accuracy(binary_predict(model.predict(_test_x), 0.5), _test_y))
            
        # print('nb_trees={} val_loss={}'.format(boost_rounds, score))
        # print(len(score))
        mean_score = np.mean([list(score[k]['valid_0'].values())[0] 
                              for k in range(len(score))])
        #mean_score = np.mean(score)

        # print('average of best iteration:', np.average(boost_rounds))
        return {'loss': mean_score, 'status': STATUS_OK}
    
    trials = Trials()
    # minimize the objective over the space
    best_params = fmin(
        fn=objective,
        space=params,
        algo=tpe.suggest,
        trials=trials,
        max_evals=max_evals
    )
    print("Done")
    return {'best_params': best_params,
            'trials': trials}

In [4]:
username = 'takes'
username = 'morinibu'
data_directory = 'C://Users//' + username + '//GitHub//kaggle//data//home_credit'
main_directory = 'C://Users//' + username + '//GitHub//kaggle//home_credit'

In [5]:
data = pd.read_csv(main_directory + '//' + 'conbined_data.csv')

In [6]:
data.head()

Unnamed: 0,AMT_ANNUITY,AMT_CREDIT,AMT_GOODS_PRICE,AMT_INCOME_TOTAL,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_YEAR,...,CC_NAME_CONTRACT_STATUS_Signed_MAX,CC_NAME_CONTRACT_STATUS_Signed_MEAN,CC_NAME_CONTRACT_STATUS_Signed_SUM,CC_NAME_CONTRACT_STATUS_Signed_VAR,CC_NAME_CONTRACT_STATUS_nan_MIN,CC_NAME_CONTRACT_STATUS_nan_MAX,CC_NAME_CONTRACT_STATUS_nan_MEAN,CC_NAME_CONTRACT_STATUS_nan_SUM,CC_NAME_CONTRACT_STATUS_nan_VAR,CC_COUNT
0,24700.5,406597.5,351000.0,202500.0,0.0,0.0,0.0,0.0,0.0,1.0,...,,,,,,,,,,
1,35698.5,1293502.5,1129500.0,270000.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2,6750.0,135000.0,135000.0,67500.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3,29686.5,312682.5,297000.0,135000.0,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0
4,21865.5,513000.0,513000.0,121500.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,


In [7]:
data_test = data[data.TARGET.isnull()].reset_index(drop=True)
data_train = data[[not(k) for k in data.TARGET.isnull()]].reset_index(drop=True)

In [8]:
ID = data_test.SK_ID_CURR
data_train.drop(columns=['SK_ID_CURR'], inplace=True)
data_test.drop(columns=['SK_ID_CURR'], inplace=True)

In [9]:
y = data_train.TARGET
X = data_train
X.drop(columns=['TARGET'], inplace=True)

In [10]:
y_submit = data_test.TARGET
X_submit = data_test
X_submit.drop(columns=['TARGET'], inplace=True)

In [93]:
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=0,
                                                   stratify=y)
listed = {
    'boosting_type': ['dart'],
    "objective": ['binary'],
    "metric": ['binary_error'],
    'eval_metric': ['auc']
}

params_LGB = {
    # 'task': 'train',
    #'boosting_type': hp.choice('boosting_type', listed['boosting_type']),
    'objective': hp.choice('objective', listed['objective']),
    #'metric': hp.choice('metric', listed['metric']),
    'eval_metric': hp.choice('eval_metric', listed['eval_metric']),
    'num_leaves': hp.quniform('num_leaves', 32,64,2),
    'max_depth': hp.quniform('max_depth', 3,12,1),
    'learning_rate': hp.loguniform('learning_rate', -6, -1),
    'feature_fraction': hp.uniform('feature_fraction', 0.5, 0.85),
    'bagging_fraction': hp.uniform('bagging_fraction', 0.6, 0.85),
    # "feature_fraction_seed": 30,
    "subsample": hp.uniform("subsample", 0.5, 0.8),
    "colsample_bytree": hp.uniform("colsample_bytree", 0.6, 0.8),
    "lambda_l2": hp.uniform('lambda_l2', 5, 10),
    "lambda_l1": hp.uniform('lambda_l1', 5, 10),
    #"drop_rate": hp.uniform('drop_rate', 0.15, 0.4),
    'bagging_freq': hp.quniform('bagging_freq', 1,10,1),
    'min_split_gain': hp.uniform('min_split_gain', 0.001, 0.1),
    'min_child_weight'; hp.uniform('min_child_weight', 10, 50)
    'silent' : -1,
    'verbose': -1
}

results_LGB = optimize_LGB(x_train, y_train, params_LGB, 3)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self.loc[key]
  'Please use {0} argument of the Dataset constructor to pass this parameter.'.format(key))


[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.657247
[3]	valid_0's binary_logloss: 0.657247
[4]	valid_0's binary_logloss: 0.657247
[5]	valid_0's binary_logloss: 0.657247
[6]	valid_0's binary_logloss: 0.657247
[7]	valid_0's binary_logloss: 0.657247
[8]	valid_0's binary_logloss: 0.657247
[9]	valid_0's binary_logloss: 0.657247
[10]	valid_0's binary_logloss: 0.657247
[11]	valid_0's binary_logloss: 0.657247
[12]	valid_0's binary_logloss: 0.657247
[13]	valid_0's binary_logloss: 0.657247
[14]	valid_0's binary_logloss: 0.657247
[15]	valid_0's binary_logloss: 0.657247
[16]	valid_0's binary_logloss: 0.657247
[17]	valid_0's binary_logloss: 0.657247
[18]	valid_0's binary_logloss: 0.657247
[19]	valid_0's binary_logloss: 0.657247
[20]	valid_0's binary_logloss: 0.657247
[21]	valid_0's binary_logloss: 0.657247
[22]	valid_0's binary_logloss: 0.657247
[23]	valid_0's binary_logloss: 0.657247
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.690115
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.687173
[3]	valid_0's binary_logloss: 0.684322
[4]	valid_0's binary_logloss: 0.684322
[5]	valid_0's binary_logloss: 0.684322
[6]	valid_0's binary_logloss: 0.684322
[7]	valid_0's binary_logloss: 0.684322
[8]	valid_0's binary_logloss: 0.684322
[9]	valid_0's binary_logloss: 0.684322
[10]	valid_0's binary_logloss: 0.684322
[11]	valid_0's binary_logloss: 0.684322
[12]	valid_0's binary_logloss: 0.684322
[13]	valid_0's binary_logloss: 0.684322
[14]	valid_0's binary_logloss: 0.684322
[15]	valid_0's binary_logloss: 0.684322
[16]	valid_0's binary_logloss: 0.684322
[17]	valid_0's binary_logloss: 0.684322
[18]	valid_0's binary_logloss: 0.684322
[19]	valid_0's binary_logloss: 0.684322
[20]	valid_0's binary_logloss: 0.684322
[21]	valid_0's binary_logloss: 0.684322
[22]	valid_0's binary_logloss: 0.684322
[23]	valid_0's binary_logloss: 0.684322
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

[1]	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 200 rounds.
[2]	valid_0's binary_logloss: 0.693147
[3]	valid_0's binary_logloss: 0.693147
[4]	valid_0's binary_logloss: 0.693147
[5]	valid_0's binary_logloss: 0.693147
[6]	valid_0's binary_logloss: 0.693147
[7]	valid_0's binary_logloss: 0.693147
[8]	valid_0's binary_logloss: 0.693147
[9]	valid_0's binary_logloss: 0.693147
[10]	valid_0's binary_logloss: 0.693147
[11]	valid_0's binary_logloss: 0.693147
[12]	valid_0's binary_logloss: 0.693147
[13]	valid_0's binary_logloss: 0.693147
[14]	valid_0's binary_logloss: 0.693147
[15]	valid_0's binary_logloss: 0.693147
[16]	valid_0's binary_logloss: 0.693147
[17]	valid_0's binary_logloss: 0.693147
[18]	valid_0's binary_logloss: 0.693147
[19]	valid_0's binary_logloss: 0.693147
[20]	valid_0's binary_logloss: 0.693147
[21]	valid_0's binary_logloss: 0.693147
[22]	valid_0's binary_logloss: 0.693147
[23]	valid_0's binary_logloss: 0.693147
[24]	valid_0's binary_logl

Done


In [94]:
param = parse_params(results_LGB['best_params'], listed)
param['bagging_freq'] = int(param['bagging_freq'])
param['num_leaves'] = int(param['num_leaves']) 
#param['max_depth'] = int(param['max_depth']) 

In [95]:
param_df = pd.DataFrame(list(param.items()),columns=['name','value'])
param_df.to_csv('result.csv', index=False)

In [11]:
param = {'n_estimators': 10000,
         'learning_rate': 0.02,
         'num_leaves': 34,
         'colsample_bytree': 0.9497036,
         'subsample': 0.8715623,
         'max_depth': 8,
         'reg_alpha': 0.041545473,
         'reg_lambda': 0.0735294,
         'min_split_gain': 0.0222415,
         'min_child_weight': 39.3259775,
         'metric': 'auc',
         'verbose': 0}

In [27]:
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=0,
                                                    test_size=0.1,
                                                    shuffle=True,
                                                    stratify=y)

train_lgb = lgb.Dataset(np.array(x_train),np.array(y_train))
test_lgb = lgb.Dataset(np.array(x_test),np.array(y_test),reference=train_lgb)

model = lgb.train(
    param,
    train_lgb,
    num_boost_round=10000,
    valid_sets=test_lgb,
    early_stopping_rounds=300
)

[1]	valid_0's auc: 0.731345
Training until validation scores don't improve for 300 rounds.
[2]	valid_0's auc: 0.732268
[3]	valid_0's auc: 0.732466
[4]	valid_0's auc: 0.73927
[5]	valid_0's auc: 0.739845
[6]	valid_0's auc: 0.74017
[7]	valid_0's auc: 0.741031
[8]	valid_0's auc: 0.743147
[9]	valid_0's auc: 0.743594
[10]	valid_0's auc: 0.744928
[11]	valid_0's auc: 0.746249
[12]	valid_0's auc: 0.746701
[13]	valid_0's auc: 0.746737
[14]	valid_0's auc: 0.747046
[15]	valid_0's auc: 0.747443
[16]	valid_0's auc: 0.747642
[17]	valid_0's auc: 0.748184
[18]	valid_0's auc: 0.748615
[19]	valid_0's auc: 0.749001
[20]	valid_0's auc: 0.749086
[21]	valid_0's auc: 0.749664
[22]	valid_0's auc: 0.749783
[23]	valid_0's auc: 0.749775
[24]	valid_0's auc: 0.750085
[25]	valid_0's auc: 0.750287
[26]	valid_0's auc: 0.750588
[27]	valid_0's auc: 0.750798
[28]	valid_0's auc: 0.751071
[29]	valid_0's auc: 0.751723
[30]	valid_0's auc: 0.752035
[31]	valid_0's auc: 0.752224
[32]	valid_0's auc: 0.752573
[33]	valid_0's auc: 

[277]	valid_0's auc: 0.783313
[278]	valid_0's auc: 0.783373
[279]	valid_0's auc: 0.783435
[280]	valid_0's auc: 0.783518
[281]	valid_0's auc: 0.783554
[282]	valid_0's auc: 0.783574
[283]	valid_0's auc: 0.783625
[284]	valid_0's auc: 0.78368
[285]	valid_0's auc: 0.783724
[286]	valid_0's auc: 0.783826
[287]	valid_0's auc: 0.783868
[288]	valid_0's auc: 0.783931
[289]	valid_0's auc: 0.784002
[290]	valid_0's auc: 0.784036
[291]	valid_0's auc: 0.784128
[292]	valid_0's auc: 0.784156
[293]	valid_0's auc: 0.784176
[294]	valid_0's auc: 0.784208
[295]	valid_0's auc: 0.784247
[296]	valid_0's auc: 0.784249
[297]	valid_0's auc: 0.784332
[298]	valid_0's auc: 0.784397
[299]	valid_0's auc: 0.784444
[300]	valid_0's auc: 0.784461
[301]	valid_0's auc: 0.784525
[302]	valid_0's auc: 0.784561
[303]	valid_0's auc: 0.784603
[304]	valid_0's auc: 0.784612
[305]	valid_0's auc: 0.78464
[306]	valid_0's auc: 0.78469
[307]	valid_0's auc: 0.784708
[308]	valid_0's auc: 0.78474
[309]	valid_0's auc: 0.784791
[310]	valid_0'

[552]	valid_0's auc: 0.78974
[553]	valid_0's auc: 0.78975
[554]	valid_0's auc: 0.789808
[555]	valid_0's auc: 0.789815
[556]	valid_0's auc: 0.789813
[557]	valid_0's auc: 0.789806
[558]	valid_0's auc: 0.789795
[559]	valid_0's auc: 0.78982
[560]	valid_0's auc: 0.789826
[561]	valid_0's auc: 0.789822
[562]	valid_0's auc: 0.789817
[563]	valid_0's auc: 0.789799
[564]	valid_0's auc: 0.789813
[565]	valid_0's auc: 0.78979
[566]	valid_0's auc: 0.789797
[567]	valid_0's auc: 0.78982
[568]	valid_0's auc: 0.789834
[569]	valid_0's auc: 0.789842
[570]	valid_0's auc: 0.789837
[571]	valid_0's auc: 0.789849
[572]	valid_0's auc: 0.789837
[573]	valid_0's auc: 0.789846
[574]	valid_0's auc: 0.789876
[575]	valid_0's auc: 0.789904
[576]	valid_0's auc: 0.78993
[577]	valid_0's auc: 0.789952
[578]	valid_0's auc: 0.789944
[579]	valid_0's auc: 0.789945
[580]	valid_0's auc: 0.789938
[581]	valid_0's auc: 0.78993
[582]	valid_0's auc: 0.789957
[583]	valid_0's auc: 0.789954
[584]	valid_0's auc: 0.789962
[585]	valid_0's a

[826]	valid_0's auc: 0.79129
[827]	valid_0's auc: 0.791296
[828]	valid_0's auc: 0.791288
[829]	valid_0's auc: 0.791279
[830]	valid_0's auc: 0.791273
[831]	valid_0's auc: 0.791266
[832]	valid_0's auc: 0.791256
[833]	valid_0's auc: 0.791259
[834]	valid_0's auc: 0.791247
[835]	valid_0's auc: 0.791258
[836]	valid_0's auc: 0.791248
[837]	valid_0's auc: 0.791287
[838]	valid_0's auc: 0.791283
[839]	valid_0's auc: 0.791293
[840]	valid_0's auc: 0.791309
[841]	valid_0's auc: 0.791329
[842]	valid_0's auc: 0.791322
[843]	valid_0's auc: 0.791323
[844]	valid_0's auc: 0.791314
[845]	valid_0's auc: 0.791324
[846]	valid_0's auc: 0.79132
[847]	valid_0's auc: 0.791363
[848]	valid_0's auc: 0.791348
[849]	valid_0's auc: 0.791315
[850]	valid_0's auc: 0.791308
[851]	valid_0's auc: 0.791301
[852]	valid_0's auc: 0.791281
[853]	valid_0's auc: 0.791272
[854]	valid_0's auc: 0.791293
[855]	valid_0's auc: 0.791283
[856]	valid_0's auc: 0.791288
[857]	valid_0's auc: 0.791287
[858]	valid_0's auc: 0.791313
[859]	valid_

[1097]	valid_0's auc: 0.7917
[1098]	valid_0's auc: 0.791701
[1099]	valid_0's auc: 0.791695
[1100]	valid_0's auc: 0.79169
[1101]	valid_0's auc: 0.79169
[1102]	valid_0's auc: 0.791709
[1103]	valid_0's auc: 0.791712
[1104]	valid_0's auc: 0.791714
[1105]	valid_0's auc: 0.791727
[1106]	valid_0's auc: 0.791738
[1107]	valid_0's auc: 0.791729
[1108]	valid_0's auc: 0.791747
[1109]	valid_0's auc: 0.791748
[1110]	valid_0's auc: 0.791759
[1111]	valid_0's auc: 0.791752
[1112]	valid_0's auc: 0.791748
[1113]	valid_0's auc: 0.791755
[1114]	valid_0's auc: 0.79176
[1115]	valid_0's auc: 0.791755
[1116]	valid_0's auc: 0.791744
[1117]	valid_0's auc: 0.791778
[1118]	valid_0's auc: 0.791789
[1119]	valid_0's auc: 0.791775
[1120]	valid_0's auc: 0.791794
[1121]	valid_0's auc: 0.791806
[1122]	valid_0's auc: 0.791808
[1123]	valid_0's auc: 0.791803
[1124]	valid_0's auc: 0.791816
[1125]	valid_0's auc: 0.791806
[1126]	valid_0's auc: 0.791823
[1127]	valid_0's auc: 0.791843
[1128]	valid_0's auc: 0.791864
[1129]	valid_

[1363]	valid_0's auc: 0.792309
[1364]	valid_0's auc: 0.792312
[1365]	valid_0's auc: 0.792319
[1366]	valid_0's auc: 0.792304
[1367]	valid_0's auc: 0.7923
[1368]	valid_0's auc: 0.792311
[1369]	valid_0's auc: 0.792307
[1370]	valid_0's auc: 0.792302
[1371]	valid_0's auc: 0.792293
[1372]	valid_0's auc: 0.792291
[1373]	valid_0's auc: 0.792283
[1374]	valid_0's auc: 0.792271
[1375]	valid_0's auc: 0.792302
[1376]	valid_0's auc: 0.792287
[1377]	valid_0's auc: 0.792283
[1378]	valid_0's auc: 0.792291
[1379]	valid_0's auc: 0.792282
[1380]	valid_0's auc: 0.792308
[1381]	valid_0's auc: 0.792311
[1382]	valid_0's auc: 0.792326
[1383]	valid_0's auc: 0.792307
[1384]	valid_0's auc: 0.792303
[1385]	valid_0's auc: 0.79231
[1386]	valid_0's auc: 0.792318
[1387]	valid_0's auc: 0.792323
[1388]	valid_0's auc: 0.79234
[1389]	valid_0's auc: 0.792361
[1390]	valid_0's auc: 0.792357
[1391]	valid_0's auc: 0.792366
[1392]	valid_0's auc: 0.792361
[1393]	valid_0's auc: 0.79237
[1394]	valid_0's auc: 0.792373
[1395]	valid_

[1629]	valid_0's auc: 0.792261
[1630]	valid_0's auc: 0.792229
[1631]	valid_0's auc: 0.792217
[1632]	valid_0's auc: 0.792202
[1633]	valid_0's auc: 0.792216
[1634]	valid_0's auc: 0.792201
[1635]	valid_0's auc: 0.792199
[1636]	valid_0's auc: 0.792182
[1637]	valid_0's auc: 0.792183
[1638]	valid_0's auc: 0.792177
[1639]	valid_0's auc: 0.79217
[1640]	valid_0's auc: 0.792149
[1641]	valid_0's auc: 0.792134
[1642]	valid_0's auc: 0.792134
[1643]	valid_0's auc: 0.792111
[1644]	valid_0's auc: 0.792097
[1645]	valid_0's auc: 0.792101
[1646]	valid_0's auc: 0.792112
[1647]	valid_0's auc: 0.792101
[1648]	valid_0's auc: 0.792109
[1649]	valid_0's auc: 0.792114
[1650]	valid_0's auc: 0.79212
[1651]	valid_0's auc: 0.792138
[1652]	valid_0's auc: 0.792134
[1653]	valid_0's auc: 0.79214
[1654]	valid_0's auc: 0.792153
[1655]	valid_0's auc: 0.792153
[1656]	valid_0's auc: 0.79213
[1657]	valid_0's auc: 0.792112
[1658]	valid_0's auc: 0.792088
[1659]	valid_0's auc: 0.792065
[1660]	valid_0's auc: 0.792088
[1661]	valid

In [30]:
from sklearn import metrics

fpr, tpr, thr = metrics.roc_curve(y_test, model.predict(x_test), pos_label=1.0)

In [31]:
metrics.auc(fpr, tpr)

0.7924047766816221

In [34]:
submission_data = pd.DataFrame([])
submission_data['SK_ID_CURR'] = ID
submission_data['TARGET'] = (model.predict(X_submit))
submission_data.head()
submission_data['TARGET'] = [k if k > 0 else 0 for k in submission_data['TARGET']]

Unnamed: 0,SK_ID_CURR,TARGET
0,100001,0.01986
1,100005,0.150642
2,100013,0.051855
3,100028,0.049598
4,100038,0.133577


In [33]:
submission_data.to_csv('submission.csv', index=False)

In [35]:
submission_data[submission_data.TARGET < 0]


Unnamed: 0,SK_ID_CURR,TARGET
67,100512,-0.003649
75,100569,-0.005080
116,100931,-0.000575
137,101051,-0.018544
141,101064,-0.000184
143,101087,-0.003186
151,101126,-0.004572
159,101186,-0.026138
189,101318,-0.004748
191,101329,-0.011940
