In [36]:
import numpy as np
import pandas as pd
import lightgbm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import normalize
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score
import sys,random
#
# Prepare the data
#
def main(argv):
    train = pd.read_csv('train.csv',sep=';')
    columns = ['cash_in_out','display_type','scanner_code_reader','atm_id']
    train.drop(columns, inplace=True, axis=1)

# get the labels
    y = train.target.values
    train.drop(['target'], inplace=True, axis=1)

    x = normalize(train.values)
#
# Create training and validation sets
#
    x, x_test, y, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
#
# Create the LightGBM data containers
#

    train_data = lightgbm.Dataset(x, label=y)
    test_data = lightgbm.Dataset(x_test, label=y_test)

    param_grid = {
        'num_leaves': [18,19,20,21,22],
        #'reg_alpha': [0.1, 0.5],
        'min_data_in_leaf': [30, 50],
        #'lambda_l1': [0, 1, 1.5],
        #'lambda_l2': [0, 1]
        }

    estimator = lightgbm.LGBMClassifier(
         boosting_type='gbdt',  
         objective='binary',  
         learning_rate=0.03, 
         metric='auc')

    gridsearch = GridSearchCV(estimator, param_grid, scoring='f1')

    model=gridsearch.fit(x, y,
        eval_set=[(x_test, y_test)],
        eval_metric=['auc'],
        early_stopping_rounds=2)

    print(model.best_params_, model.best_score_)


    print('Best score: %0.3f' % gridsearch.best_score_)
    print('Best parameters set:')
    best_parameters = gridsearch.best_estimator_.get_params()
    for param_name in sorted(param_grid.keys()):
        print('\t%s: %r' % (param_name, best_parameters[param_name]))
        
    predictions = gridsearch.predict(x_test)
    print('F-score:', f1_score(y_test, predictions))

if __name__ == '__main__':
    main(sys.argv[1:])

#print('Save model...')
# save model to file
# AUC 0.67 - 70 
# AUC 0.65 - 65
#model.save_model('model.txt')

[1]	valid_0's auc: 0.638955
Training until validation scores don't improve for 2 rounds.
[2]	valid_0's auc: 0.643067
[3]	valid_0's auc: 0.654098
[4]	valid_0's auc: 0.654285
[5]	valid_0's auc: 0.655602
[6]	valid_0's auc: 0.65597
[7]	valid_0's auc: 0.658023
[8]	valid_0's auc: 0.66063
[9]	valid_0's auc: 0.66298
[10]	valid_0's auc: 0.663866
[11]	valid_0's auc: 0.662107
[12]	valid_0's auc: 0.662657
Early stopping, best iteration is:
[10]	valid_0's auc: 0.663866
[1]	valid_0's auc: 0.624966
Training until validation scores don't improve for 2 rounds.
[2]	valid_0's auc: 0.631897
[3]	valid_0's auc: 0.638065
[4]	valid_0's auc: 0.635416
[5]	valid_0's auc: 0.640613
[6]	valid_0's auc: 0.642876
[7]	valid_0's auc: 0.644201
[8]	valid_0's auc: 0.646476
[9]	valid_0's auc: 0.646297
[10]	valid_0's auc: 0.646625
[11]	valid_0's auc: 0.64653
[12]	valid_0's auc: 0.645753
Early stopping, best iteration is:
[10]	valid_0's auc: 0.646625
[1]	valid_0's auc: 0.62698
Training until validation scores don't improve fo

In [8]:
#
# Create a submission
#
test = pd.read_csv('test.csv',sep=';')
ids = test['atm_id'].values
columns = ['cash_in_out','display_type','scanner_code_reader','atm_id']
test.drop(columns, inplace=True, axis=1)
x = test.values
#submission = pd.read_csv('out.csv')

#submission.drop('ATM_ID', inplace=True, axis=1)

y = model.predict(x)
y = [1 if i >=np.mean(y) else 0 for i in y]
output = pd.DataFrame({'ATM_ID': ids, 'PREDICT': y})
output.to_csv("out.csv", index=False)