In [1]:
import numpy as np
import pickle as pkl
import pandas as pd

In [2]:
loadfile = open('data/X_Train_Seq', 'rb')
X_Train_Seq = pkl.load(loadfile)
loadfile.close()

loadfile = open('data/X_Test_Seq', 'rb')
X_Test_Seq = pkl.load(loadfile)
loadfile.close()

loadfile = open('data/Y_Train_Seq', 'rb')
Y_Train_Seq = pkl.load(loadfile)
loadfile.close()

loadfile = open('data/TranID_Seq', 'rb')
TranID_Seq = pkl.load(loadfile)
loadfile.close()

# Model Two

In [3]:
from keras.utils import Sequence
from keras.models import Model
from keras.layers import Dense, Activation, LSTM, Masking, Dropout, BatchNormalization, Input, Lambda
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.utils.generic_utils import get_custom_objects
import tensorflow as tf
import keras.backend as K
from keras import activations

Using TensorFlow backend.


In [4]:
feature_num = len(X_Train_Seq[0][0])

In [5]:
def modify_mse(y_true, y_pred):
    fraud = tf.ones_like(y_pred) - tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
    normal = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
    return 10*K.mean(fraud*fraud) + K.mean(normal*normal)

In [6]:
def attention(vecs):
    all_h, c = vecs
    c = tf.expand_dims(c,1)
    weight = activations.softmax(K.sum(all_h*c,axis=-1))
    weight = tf.expand_dims(weight,2)
    vector = K.sum(weight*all_h,axis=1,keepdims=True)
    return tf.tile(vector,[1,3,1])

In [7]:
input_shape = (3,feature_num)
inputs  = Input(shape = input_shape)
x = Masking(mask_value=-99, input_shape=(3, feature_num))(inputs)
x = Dropout(0.2)(x)
all_h, _, c = LSTM(50, return_sequences=True, return_state=True)(x)
x = Lambda(attention)([all_h, c])
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)
x = Dropout(0.2)(x)
predictions  = Dense(1, activation='relu')(x)
model = Model(inputs=inputs, outputs=predictions)
model.compile(optimizer='adam', loss=modify_mse, metrics=['acc'])
print(model.summary())




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 3, 263)       0                                            
__________________________________________________________________________________________________
masking_1 (Masking)             (None, 3, 263)       0           input_1[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 3, 263)       0           masking_1[0][0]                  
_____________________________________________________________________________________________

In [8]:
best_weights_filepath = "best2.hdf5"
callback = EarlyStopping(monitor="val_loss", patience=10, verbose=1, mode="auto")
saveBestModel = ModelCheckpoint(best_weights_filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
model.fit(np.array(X_Train_Seq), np.array(Y_Train_Seq).reshape((-1,3,1)), validation_split=0.2, epochs=1000, batch_size=128, callbacks=[callback,saveBestModel])




Train on 246484 samples, validate on 61622 samples
Epoch 1/1000






Epoch 00001: val_loss improved from inf to 0.14406, saving model to best2.hdf5
Epoch 2/1000

Epoch 00002: val_loss improved from 0.14406 to 0.13315, saving model to best2.hdf5
Epoch 3/1000

Epoch 00003: val_loss improved from 0.13315 to 0.12656, saving model to best2.hdf5
Epoch 4/1000

Epoch 00004: val_loss did not improve from 0.12656
Epoch 5/1000

Epoch 00005: val_loss improved from 0.12656 to 0.12646, saving model to best2.hdf5
Epoch 6/1000

Epoch 00006: val_loss improved from 0.12646 to 0.12585, saving model to best2.hdf5
Epoch 7/1000

Epoch 00007: val_loss improved from 0.12585 to 0.12402, saving model to best2.hdf5
Epoch 8/1000

Epoch 00008: val_loss did not improve from 0.12402
Epoch 9/1000

Epoch 00009: val_loss did not improve from 0.12402
Epoch 10/1000

Epoch 00010: val_loss did not improve from 0.12402
Epoch 11/1000

Epoch 00011: val_loss did not improve from 0.12402
Epoch 12/1000

Epoch 00012: val_loss

<keras.callbacks.History at 0x7f3633f5f5c0>

# Test

In [9]:
def subFile(filename,threshold):
    predict = model.predict(np.array(X_Test_Seq)) > threshold
    index_TranID = 0
    answer = []
    for seqIndex,seq in enumerate(predict) :
        for tranIndex, tran in enumerate(seq):
            if TranID_Seq[index_TranID]!=-1:
                answer.append([int(TranID_Seq[index_TranID]),1 if tran else 0])
            index_TranID+=1
    answer = np.array(answer)
    a = {'TransactionID':answer[:,0],
          'isFraud':answer[:,1]}
    ans = pd.DataFrame(a)
    sample = pd.read_csv("data/IEEE/sample_submission.csv")
    sub = pd.merge(sample, ans, how="left", on="TransactionID")
    del sub['isFraud_x']
    sub = sub.rename(columns={'isFraud_y':'isFraud'})
    sub.to_csv('submission/sub2_'+filename+'.csv',index=False)

In [10]:
model.load_weights("best2.hdf5")

In [11]:
for t in ['.1','.2','.3','.4','.5','.6']:
    subFile(t,float(t))