In [14]:
from utils import utils, gini
from constants import *
import os

import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score, log_loss
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization

from keras.models import Sequential, Model
from keras.layers import Input, Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback, CSVLogger
from keras.wrappers.scikit_learn import KerasClassifier

MODEL_NAME = 'nn'

In [10]:
train, test = utils.load_data()
train, test = utils.engineer_features(train, test)

Successfully loaded train and test data
Successfully engineer features train and test data


In [11]:
# Fillna for minmax scaler
train = train.fillna(train.median())
test = test.fillna(test.median())

In [12]:
X_train = train.drop(['id', 'target'], axis=1)
y_train = train.target
X_test = test.drop('id', axis=1)

In [13]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test =  scaler.transform(X_test)

X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, random_state=SEED, test_size=0.2)

In [30]:
class gini_callback(Callback):
    def __init__(self, training_data, validation_data):
        self.X_tr = training_data[0]
        self.y_tr = training_data[1]
        self.X_val = validation_data[0]
        self.y_val = validation_data[1]

    def on_train_begin(self, logs={}):
        return

    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        y_pred_tr = self.model.predict_proba(self.X_tr)
#         roc = roc_auc_score(self.y_tr, y_pred_tr)
#         logs['roc_auc'] = roc
#         logs['gini_tr'] = (roc * 2 ) - 1
        logs['gini_tr'] = gini.gini_sklearn(self.y_tr, y_pred_tr)

        y_pred_val = self.model.predict_proba(self.X_val)
#         roc = roc_auc_score(self.y_val, y_pred_val)
#         logs['roc_auc_val'] = roc
#         logs['gini_val'] = (roc * 2 ) - 1
        logs['gini_val'] = gini.gini_sklearn(self.y_val, y_pred_val)


        print('Gini Score in training set: {},  test set: {}'.format(logs['gini_tr'], logs['gini_val']))
        return

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return
    
    
# Train Data
def create_model():
    model = Sequential()
    model.add(
        Dense(
            200,
            input_dim=X_tr.shape[1],
            kernel_initializer='glorot_normal',
            ))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(100, kernel_initializer='glorot_normal'))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.25))
    model.add(Dense(50, kernel_initializer='glorot_normal'))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.15))
    model.add(Dense(25, kernel_initializer='glorot_normal'))
    model.add(Activation('relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile model
#     optimizer = SGD(lr=learn_rate, momentum=momentum)

    model.compile(optimizer='sgd', metrics = ['accuracy'], loss='binary_crossentropy')
    
    return model 


log_path = os.path.join(LOG_PATH, MODEL_NAME + '_log.csv')
checkpoint_path = os.path.join(LOG_PATH, MODEL_NAME + '_check.check')

In [31]:
# epochs = 3
# batch_size = 128
# patience = 10
# ['sgd', 'adam', 'rmsprop', 'adagrad']

callbacks = [
    gini_callback(training_data=(X_tr, y_tr), validation_data=(X_val, y_val)),
    EarlyStopping(monitor='gini_val', patience=patience, mode='max', verbose=1),
    CSVLogger(log_path, separator=',', append=False),
    ModelCheckpoint(checkpoint_path, monitor='gini_val', mode='max', save_best_only=True, verbose=1)
]


model = KerasClassifier(build_fn=create_model,
                        batch_size=batch_size,
                        epochs=int(epochs),
                        verbose=99,
                        shuffle=True,
                        callbacks=callbacks)
model.fit(X_tr, y_tr)

In [42]:
# def kerasbayes(epochs):
#     model = KerasClassifier(build_fn=create_model,
#                         batch_size=batch_size,
#                         epochs=int(epochs),
#                         verbose=99,
#                         shuffle=True,
#                         callbacks=callbacks)
#     model.fit(X_tr, y_tr)
    
#     pred = model.predict_proba(X_val)
#     gini_score = gini.gini_sklearn(y_val, pred)
    
#     return  (-1 * gini_score)

# kerasBO = BayesianOptimization(kerasbayes, 
#                                {'epochs':(1, 5)}
#                               )