In [112]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in

import numpy as np
import pandas as pd
import lightgbm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import normalize
from sklearn.model_selection import GridSearchCV


#
# Prepare the data
#

train = pd.read_csv('train.csv',sep=';')
columns = ['cash_in_out','display_type','scanner_code_reader','atm_id']
train.drop(columns, inplace=True, axis=1)

# get the labels
y = train.target.values
train.drop(['target'], inplace=True, axis=1)

x = normalize(train.values)
#
# Create training and validation sets
#
x, x_test, y, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
#
# Create the LightGBM data containers
#

train_data = lightgbm.Dataset(x, label=y)
test_data = lightgbm.Dataset(x_test, label=y_test)

param_grid = {
    'num_leaves': [20, 31, 127],
    'reg_alpha': [0.1, 0.5],
    'min_data_in_leaf': [30, 50, 100, 300, 400],
    'lambda_l1': [0, 1, 1.5],
    'lambda_l2': [0, 1]
    }

estimator = lightgbm.LGBMClassifier(boosting_type='gbdt',  
                                   objective='binary',  
                                   learning_rate=0.01, 
                                   metric='auc')
gridsearch = GridSearchCV(estimator, param_grid)

model=gridsearch.fit(x, y,
        eval_set=[(x_test, y_test)],
        eval_metric=['auc', 'binary_logloss'],
early_stopping_rounds=5)


print(model.best_params_, model.best_score_)

#print('Save model...')
# save model to file
# AUC 0.67 - 70 
# AUC 0.65 - 65
#model.save_model('model.txt')

[1]	valid_0's binary_logloss: 0.692008	valid_0's auc: 0.636666
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's binary_logloss: 0.690902	valid_0's auc: 0.636666
[3]	valid_0's binary_logloss: 0.689835	valid_0's auc: 0.63945
[4]	valid_0's binary_logloss: 0.688783	valid_0's auc: 0.645231
[5]	valid_0's binary_logloss: 0.687772	valid_0's auc: 0.644283
[6]	valid_0's binary_logloss: 0.686872	valid_0's auc: 0.645449
[7]	valid_0's binary_logloss: 0.685849	valid_0's auc: 0.644931
[8]	valid_0's binary_logloss: 0.684922	valid_0's auc: 0.650114
[9]	valid_0's binary_logloss: 0.684124	valid_0's auc: 0.649636
[10]	valid_0's binary_logloss: 0.683288	valid_0's auc: 0.650896
[11]	valid_0's binary_logloss: 0.682517	valid_0's auc: 0.651009
[12]	valid_0's binary_logloss: 0.681727	valid_0's auc: 0.6526
[13]	valid_0's binary_logloss: 0.680868	valid_0's auc: 0.653224
[14]	valid_0's binary_logloss: 0.680154	valid_0's auc: 0.652844
[15]	valid_0's binary_logloss: 0.679441	valid_0's auc: 

[6]	valid_0's binary_logloss: 0.686813	valid_0's auc: 0.650643
[7]	valid_0's binary_logloss: 0.685836	valid_0's auc: 0.6542
[8]	valid_0's binary_logloss: 0.684967	valid_0's auc: 0.653592
[9]	valid_0's binary_logloss: 0.684045	valid_0's auc: 0.656036
[10]	valid_0's binary_logloss: 0.683319	valid_0's auc: 0.655357
[11]	valid_0's binary_logloss: 0.682528	valid_0's auc: 0.655883
[12]	valid_0's binary_logloss: 0.681659	valid_0's auc: 0.655973
[13]	valid_0's binary_logloss: 0.680793	valid_0's auc: 0.658276
[14]	valid_0's binary_logloss: 0.680116	valid_0's auc: 0.65769
[15]	valid_0's binary_logloss: 0.679357	valid_0's auc: 0.658023
[16]	valid_0's binary_logloss: 0.678596	valid_0's auc: 0.659644
[17]	valid_0's binary_logloss: 0.677955	valid_0's auc: 0.659844
[18]	valid_0's binary_logloss: 0.677263	valid_0's auc: 0.660332
[19]	valid_0's binary_logloss: 0.676493	valid_0's auc: 0.660705
[20]	valid_0's binary_logloss: 0.67577	valid_0's auc: 0.661544
[21]	valid_0's binary_logloss: 0.675152	valid_0'

[11]	valid_0's binary_logloss: 0.682627	valid_0's auc: 0.65441
[12]	valid_0's binary_logloss: 0.681819	valid_0's auc: 0.655902
[13]	valid_0's binary_logloss: 0.68105	valid_0's auc: 0.656588
[14]	valid_0's binary_logloss: 0.680398	valid_0's auc: 0.655872
[15]	valid_0's binary_logloss: 0.679598	valid_0's auc: 0.657269
[16]	valid_0's binary_logloss: 0.678744	valid_0's auc: 0.658018
[17]	valid_0's binary_logloss: 0.67797	valid_0's auc: 0.658755
[18]	valid_0's binary_logloss: 0.677277	valid_0's auc: 0.658837
[19]	valid_0's binary_logloss: 0.676531	valid_0's auc: 0.659633
[20]	valid_0's binary_logloss: 0.675857	valid_0's auc: 0.659822
[21]	valid_0's binary_logloss: 0.675209	valid_0's auc: 0.6596
[22]	valid_0's binary_logloss: 0.674533	valid_0's auc: 0.659687
[23]	valid_0's binary_logloss: 0.673883	valid_0's auc: 0.659524
[24]	valid_0's binary_logloss: 0.673219	valid_0's auc: 0.660096
[25]	valid_0's binary_logloss: 0.672634	valid_0's auc: 0.660054
[26]	valid_0's binary_logloss: 0.671999	valid

[61]	valid_0's binary_logloss: 0.656885	valid_0's auc: 0.666081
[62]	valid_0's binary_logloss: 0.656608	valid_0's auc: 0.666064
[63]	valid_0's binary_logloss: 0.656211	valid_0's auc: 0.666591
[64]	valid_0's binary_logloss: 0.655868	valid_0's auc: 0.666912
[65]	valid_0's binary_logloss: 0.655637	valid_0's auc: 0.666947
[66]	valid_0's binary_logloss: 0.655208	valid_0's auc: 0.667609
[67]	valid_0's binary_logloss: 0.655053	valid_0's auc: 0.667309
[68]	valid_0's binary_logloss: 0.654797	valid_0's auc: 0.667459
[69]	valid_0's binary_logloss: 0.654647	valid_0's auc: 0.667125
[70]	valid_0's binary_logloss: 0.654395	valid_0's auc: 0.667036
[71]	valid_0's binary_logloss: 0.654155	valid_0's auc: 0.667138
Early stopping, best iteration is:
[66]	valid_0's binary_logloss: 0.655208	valid_0's auc: 0.667609
[1]	valid_0's binary_logloss: 0.692113	valid_0's auc: 0.633397
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's binary_logloss: 0.691051	valid_0's auc: 0.645989
[3]	valid_

[1]	valid_0's binary_logloss: 0.692203	valid_0's auc: 0.621437
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's binary_logloss: 0.691112	valid_0's auc: 0.640494
[3]	valid_0's binary_logloss: 0.690199	valid_0's auc: 0.639769
[4]	valid_0's binary_logloss: 0.68914	valid_0's auc: 0.645709
[5]	valid_0's binary_logloss: 0.688239	valid_0's auc: 0.644201
[6]	valid_0's binary_logloss: 0.687343	valid_0's auc: 0.650652
[7]	valid_0's binary_logloss: 0.686348	valid_0's auc: 0.652624
[8]	valid_0's binary_logloss: 0.685403	valid_0's auc: 0.655465
[9]	valid_0's binary_logloss: 0.684585	valid_0's auc: 0.655357
[10]	valid_0's binary_logloss: 0.683625	valid_0's auc: 0.657943
[11]	valid_0's binary_logloss: 0.68271	valid_0's auc: 0.658615
[12]	valid_0's binary_logloss: 0.682003	valid_0's auc: 0.656784
[13]	valid_0's binary_logloss: 0.68117	valid_0's auc: 0.658032
[14]	valid_0's binary_logloss: 0.680374	valid_0's auc: 0.658332
[15]	valid_0's binary_logloss: 0.679591	valid_0's auc: 

[35]	valid_0's binary_logloss: 0.66766	valid_0's auc: 0.66271
[36]	valid_0's binary_logloss: 0.667173	valid_0's auc: 0.663296
[37]	valid_0's binary_logloss: 0.66674	valid_0's auc: 0.663195
[38]	valid_0's binary_logloss: 0.666265	valid_0's auc: 0.663456
[39]	valid_0's binary_logloss: 0.665882	valid_0's auc: 0.663581
[40]	valid_0's binary_logloss: 0.665402	valid_0's auc: 0.663695
[41]	valid_0's binary_logloss: 0.664833	valid_0's auc: 0.664848
[42]	valid_0's binary_logloss: 0.664434	valid_0's auc: 0.664959
[43]	valid_0's binary_logloss: 0.664088	valid_0's auc: 0.665117
[44]	valid_0's binary_logloss: 0.663557	valid_0's auc: 0.665799
[45]	valid_0's binary_logloss: 0.663027	valid_0's auc: 0.666308
[46]	valid_0's binary_logloss: 0.662685	valid_0's auc: 0.666271
[47]	valid_0's binary_logloss: 0.662346	valid_0's auc: 0.66606
[48]	valid_0's binary_logloss: 0.662041	valid_0's auc: 0.666047
[49]	valid_0's binary_logloss: 0.661675	valid_0's auc: 0.665917
[50]	valid_0's binary_logloss: 0.661264	vali

[5]	valid_0's binary_logloss: 0.688487	valid_0's auc: 0.638151
[6]	valid_0's binary_logloss: 0.687403	valid_0's auc: 0.645244
[7]	valid_0's binary_logloss: 0.68642	valid_0's auc: 0.648299
[8]	valid_0's binary_logloss: 0.685563	valid_0's auc: 0.646989
[9]	valid_0's binary_logloss: 0.684736	valid_0's auc: 0.64818
[10]	valid_0's binary_logloss: 0.683826	valid_0's auc: 0.649118
[11]	valid_0's binary_logloss: 0.683038	valid_0's auc: 0.648274
[12]	valid_0's binary_logloss: 0.682208	valid_0's auc: 0.651199
[13]	valid_0's binary_logloss: 0.681508	valid_0's auc: 0.650975
[14]	valid_0's binary_logloss: 0.680843	valid_0's auc: 0.650433
[15]	valid_0's binary_logloss: 0.679993	valid_0's auc: 0.65378
[16]	valid_0's binary_logloss: 0.679304	valid_0's auc: 0.653581
[17]	valid_0's binary_logloss: 0.678543	valid_0's auc: 0.654805
[18]	valid_0's binary_logloss: 0.677666	valid_0's auc: 0.657269
[19]	valid_0's binary_logloss: 0.677025	valid_0's auc: 0.657317
[20]	valid_0's binary_logloss: 0.676228	valid_0'

AttributeError: 'GridSearchCV' object has no attribute 'save_model'

In [116]:
#
# Create a submission
#
test = pd.read_csv('test.csv',sep=';')
ids = test['atm_id'].values
columns = ['cash_in_out','display_type','scanner_code_reader','atm_id']
test.drop(columns, inplace=True, axis=1)
x = test.values
#submission = pd.read_csv('out.csv')

#submission.drop('ATM_ID', inplace=True, axis=1)

y = model.predict(x)
y = [1 if i >=np.mean(y) else 0 for i in y]
output = pd.DataFrame({'ATM_ID': ids, 'PREDICT': y})
output.to_csv("out.csv", index=False)