In [1]:
from keras.utils import Sequence
import numpy as np
import pickle as pkl
import pandas as pd

Using TensorFlow backend.


# Load Data

In [2]:
loadfile = open('data/X_Train_Seq', 'rb')
X_Train_Seq = pkl.load(loadfile)
loadfile.close()

loadfile = open('data/X_Test_Seq', 'rb')
X_Test_Seq = pkl.load(loadfile)
loadfile.close()

loadfile = open('data/Y_Train_Seq', 'rb')
Y_Train_Seq = pkl.load(loadfile)
loadfile.close()

loadfile = open('data/TranID_Seq', 'rb')
TranID_Seq = pkl.load(loadfile)
loadfile.close()

# Model 1

In [3]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Masking, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.utils.generic_utils import get_custom_objects
import tensorflow as tf
import keras.backend as K

In [4]:
feature_num = len(X_Train_Seq[0][0])

In [5]:
def modify_mse(y_true, y_pred):
    fraud = tf.ones_like(y_pred) - tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    normal = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
    return 10*K.mean(fraud*fraud) + K.mean(normal*normal)

In [6]:
model = Sequential()
model.add(Masking(mask_value=-99, input_shape=(3, feature_num)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss=modify_mse, metrics=['acc'])
print(model.summary())




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking_1 (Masking)          (None, 3, 263)            0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 3, 263)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 3, 50)             62800     
_________________________________________________________________
dropout_2 (Dropout)          (None, 3, 50)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 3, 32)             1632      
____________________________________________________________

In [7]:
best_weights_filepath = "best1.hdf5"
callback = EarlyStopping(monitor="val_loss", patience=10, verbose=1, mode="auto")
saveBestModel = ModelCheckpoint(best_weights_filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
model.fit(np.array(X_Train_Seq), np.array(Y_Train_Seq).reshape((-1,3,1)), validation_split=0.2, epochs=1000, batch_size=128, callbacks=[callback,saveBestModel])




Train on 246484 samples, validate on 61622 samples
Epoch 1/1000






Epoch 00001: val_loss improved from inf to 0.13712, saving model to best1.hdf5
Epoch 2/1000

Epoch 00002: val_loss improved from 0.13712 to 0.13214, saving model to best1.hdf5
Epoch 3/1000

Epoch 00003: val_loss improved from 0.13214 to 0.12824, saving model to best1.hdf5
Epoch 4/1000

Epoch 00004: val_loss did not improve from 0.12824
Epoch 5/1000

Epoch 00005: val_loss improved from 0.12824 to 0.12721, saving model to best1.hdf5
Epoch 6/1000

Epoch 00006: val_loss did not improve from 0.12721
Epoch 7/1000

Epoch 00007: val_loss did not improve from 0.12721
Epoch 8/1000

Epoch 00008: val_loss did not improve from 0.12721
Epoch 9/1000

Epoch 00009: val_loss did not improve from 0.12721
Epoch 10/1000

Epoch 00010: val_loss did not improve from 0.12721
Epoch 11/1000

Epoch 00011: val_loss did not improve from 0.12721
Epoch 12/1000

Epoch 00012: val_loss did not improve from 0.12721
Epoch 13/1000

Epoch 00013: val_lo


Epoch 00032: val_loss improved from 0.12425 to 0.12346, saving model to best1.hdf5
Epoch 33/1000

Epoch 00033: val_loss did not improve from 0.12346
Epoch 34/1000

Epoch 00034: val_loss did not improve from 0.12346
Epoch 35/1000

Epoch 00035: val_loss did not improve from 0.12346
Epoch 36/1000

Epoch 00036: val_loss did not improve from 0.12346
Epoch 37/1000

Epoch 00037: val_loss did not improve from 0.12346
Epoch 38/1000

Epoch 00038: val_loss did not improve from 0.12346
Epoch 39/1000

Epoch 00039: val_loss did not improve from 0.12346
Epoch 40/1000

Epoch 00040: val_loss did not improve from 0.12346
Epoch 41/1000

Epoch 00041: val_loss improved from 0.12346 to 0.12345, saving model to best1.hdf5
Epoch 42/1000

Epoch 00042: val_loss did not improve from 0.12345
Epoch 43/1000

Epoch 00043: val_loss did not improve from 0.12345
Epoch 44/1000

Epoch 00044: val_loss did not improve from 0.12345
Epoch 45/1000

Epoch 00045: val_loss did not improve from 0.12345
Epoch 46/1000

Epoch 00046

<keras.callbacks.History at 0x7fea35b9b320>

# Test

In [8]:
def subFile(filename,threshold):
    predict = model.predict(np.array(X_Test_Seq)) > threshold
    index_TranID = 0
    answer = []
    for seqIndex,seq in enumerate(predict) :
        for tranIndex, tran in enumerate(seq):
            if TranID_Seq[index_TranID]!=-1:
                answer.append([int(TranID_Seq[index_TranID]),1 if tran else 0])
            index_TranID+=1
    answer = np.array(answer)
    a = {'TransactionID':answer[:,0],
          'isFraud':answer[:,1]}
    ans = pd.DataFrame(a)
    sample = pd.read_csv("data/IEEE/sample_submission.csv")
    sub = pd.merge(sample, ans, how="left", on="TransactionID")
    del sub['isFraud_x']
    sub = sub.rename(columns={'isFraud_y':'isFraud'})
    sub.to_csv('submission/sub1_'+filename+'.csv',index=False)

In [9]:
model.load_weights("best1.hdf5")

In [10]:
for t in ['.1','.2','.3','.4','.5','.6']:
    subFile(t,float(t))