In [None]:
from keras.utils import Sequence
import numpy as np
import pickle as pkl
import pandas as pd

# Load Data

In [None]:
loadfile = open('data/X_Train_Seq', 'rb')
X_Train_Seq = pkl.load(loadfile)
loadfile.close()

loadfile = open('data/X_Test_Seq', 'rb')
X_Test_Seq = pkl.load(loadfile)
loadfile.close()

loadfile = open('data/Y_Train_Seq', 'rb')
Y_Train_Seq = pkl.load(loadfile)
loadfile.close()

loadfile = open('data/TranID_Seq', 'rb')
TranID_Seq = pkl.load(loadfile)
loadfile.close()

# Model 1

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Masking, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.utils.generic_utils import get_custom_objects
import tensorflow as tf
import keras.backend as K

In [None]:
feature_num = len(X_Train_Seq[0][0])

In [None]:
def modify_mse(y_true, y_pred):
    fraud = tf.ones_like(y_pred) - tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    normal = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
    return K.mean(fraud*fraud) + 0.4*K.mean(normal*normal)

In [None]:
model = Sequential()
model.add(Masking(mask_value=-99, input_shape=(3, feature_num)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss=modify_mse, metrics=['acc'])
print(model.summary())

In [None]:
best_weights_filepath = "best1.hdf5"
callback = EarlyStopping(monitor="val_loss", patience=10, verbose=1, mode="auto")
saveBestModel = ModelCheckpoint(best_weights_filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
model.fit(np.array(X_Train_Seq), np.array(Y_Train_Seq).reshape((-1,3,1)), validation_split=0.2, epochs=1000, batch_size=128, callbacks=[callback,saveBestModel])

# Test

In [None]:
def subFile(filename,threshold):
    predict = model.predict(np.array(X_Test_Seq)) > threshold
    index_TranID = 0
    answer = []
    for seqIndex,seq in enumerate(predict) :
        for tranIndex, tran in enumerate(seq):
            if TranID_Seq[index_TranID]!=-1:
                answer.append([int(TranID_Seq[index_TranID]),1 if tran else 0])
            index_TranID+=1
    answer = np.array(answer)
    a = {'TransactionID':answer[:,0],
          'isFraud':answer[:,1]}
    ans = pd.DataFrame(a)
    sample = pd.read_csv("data/IEEE/sample_submission.csv")
    sub = pd.merge(sample, ans, how="left", on="TransactionID")
    del sub['isFraud_x']
    sub = sub.rename(columns={'isFraud_y':'isFraud'})
    sub.to_csv('ubmission/sum1_'+filename+'.csv',index=False)

In [None]:
model.load_weights("best1.hdf5")

In [None]:
for t in ['.1','.2','.3','.4','.5','.6']:
    subFile(t,float(t))