In [65]:

import numpy as np
import pandas as pd
from datetime import datetime

from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize
from sklearn import preprocessing
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers

from xgboost import XGBRegressor, XGBRFRegressor, XGBClassifier

In [66]:
train_data = pd.read_csv("data/train.csv",index_col = 'id')
test_data = pd.read_csv("data/test.csv",index_col = 'id')

In [67]:
train_data = train_data.fillna(train_data.mean())
test_data = test_data.fillna(train_data.mean())


In [68]:
Y_train = train_data['claim'].copy()
X_train = train_data.copy().drop('claim', axis = 1)

In [69]:
scaler = preprocessing.StandardScaler()

X_train[X_train.columns] = scaler.fit_transform(X_train[X_train.columns])
test_data[test_data.columns] = scaler.transform(test_data[test_data.columns])


In [70]:
kfold = KFold(n_splits=5, shuffle=False)
model = XGBClassifier(n_estimators=4500,
                          reg_alpha=26,
                          reg_lambda=3,
                          subsample=0.6,
                          colsample_bytree=0.6,
                          max_depth=9,
                          min_child_weight=5,
                          gamma=13.05,
                          learning_rate=0.01,
                          n_jobs=4,
                          booster='gbtree',
                          verbosity=1)
for train_index, test_index in kfold.split(X_train, Y_train):

    model.fit(X_train.iloc[train_index], Y_train.iloc[train_index],
                 early_stopping_rounds=5,
                 eval_metric='auc',
                 eval_set=[(X_train.iloc[train_index], Y_train.iloc[train_index]),(X_train.iloc[test_index], Y_train.iloc[test_index])],
                 verbose=True)




[0]	validation_0-auc:0.55377	validation_1-auc:0.54283
[1]	validation_0-auc:0.57906	validation_1-auc:0.56710
[2]	validation_0-auc:0.60418	validation_1-auc:0.59252
[3]	validation_0-auc:0.62485	validation_1-auc:0.61297


KeyboardInterrupt: 

In [None]:
predicts = model.predict(test_data)


In [None]:

output = pd.DataFrame(predicts, columns = ['claim'])
output['id'] = test_data.index
output.to_csv('submission.csv', index=False)





