# Train using Keras and DNN architecture

weight initializer: https://blog.usejournal.com/credit-card-fraud-detection-by-neural-network-in-keras-4bd81cc9e7fe

In [89]:
import pandas as pd
import numpy as np
import keras
from keras.layers import Dense, Dropout, BatchNormalization

from sklearn.metrics import roc_auc_score

!python -c 'import tensorflow as tf; print(tf.version)'

<module 'tensorflow._api.v1.version' from '/home/ec2-user/anaconda3/lib/python3.7/site-packages/tensorflow/_api/v1/version/__init__.py'>


# Read/Preprocess Dataset

In [2]:
def read_data():
    return pd.read_csv('./datasets/kfold/transaction_fold_0_0_0.csv')


def get_string_features(df):
    string_features = []
    
    for col in df.columns:
        if df[col].dtype == np.dtype('object'):
            string_features.append(col)
            
    return string_features

    
def preprocess(df):
    string_features = get_string_features(df)
    
    df = df.drop(columns=string_features)
    
    return df.drop(columns=['isFraud', 'TransactionID', 'TransactionDT']), df['isFraud']

    
train = read_data()

X, y = preprocess(train)


In [9]:
X = X.fillna(-999)

In [3]:
print(train.shape)
print('{:,}'.format(train.memory_usage().sum()))
print(X.shape)
print(y.shape)

(73816, 394)
232,668,112
(73816, 377)
(73816,)


In [24]:
X.head()

Unnamed: 0,TransactionAmt,card1,card2,card3,card5,addr1,addr2,dist1,dist2,C1,...,V330,V331,V332,V333,V334,V335,V336,V337,V338,V339
0,29.0,2755,404.0,150.0,102.0,325.0,87.0,-999.0,-999.0,1.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
1,49.0,5937,555.0,150.0,226.0,272.0,87.0,36.0,-999.0,1.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
2,422.5,12695,490.0,150.0,226.0,325.0,87.0,-999.0,-999.0,1.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
3,10.5,11839,490.0,150.0,226.0,226.0,87.0,-999.0,-999.0,1.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0
4,159.95,11401,543.0,150.0,117.0,204.0,87.0,-999.0,-999.0,127.0,...,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0,-999.0


In [28]:
y = y.astype('int32')

In [None]:
# del train

# import gc
# gc.collect()

# Build Model

In [91]:
def get_model(input_dim, lr=1e-3):
    
#     inputs = keras.layers.Input(shape=[input_dim,])
    
#     x = keras.layers.Dense(256, activation='relu')(inputs)
#     x = keras.layers.Dense(128, activation='relu')(x)
#     x = keras.layers.Dense(64, activation='relu')(x)
#     x = keras.layers.Dense(16, activation='relu')(x)
#     outputs = keras.layers.Dense(1, activation='sigmoid')(x)
    
#     model = keras.models.Model(inputs=inputs,
#                               outputs=outputs)
    

    model = keras.models.Sequential()
    model.add(Dense(256, activation='relu', kernel_initializer = 'uniform', input_dim=input_dim))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(128, activation='relu', kernel_initializer = 'uniform'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu', kernel_initializer = 'uniform'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu', kernel_initializer = 'uniform'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(16, activation='relu', kernel_initializer = 'uniform'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(8, activation='relu', kernel_initializer = 'uniform'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid', kernel_initializer = 'uniform'))
    
    optimizer = keras.optimizers.Adam(lr=lr)

    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                 metrics=['accuracy'])
    
    
    return model


model = get_model(X.shape[1], lr=1e-4)

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_84 (Dense)             (None, 256)               96768     
_________________________________________________________________
batch_normalization_2 (Batch (None, 256)               1024      
_________________________________________________________________
dropout_26 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_85 (Dense)             (None, 128)               32896     
_________________________________________________________________
batch_normalization_3 (Batch (None, 128)               512       
_________________________________________________________________
dropout_27 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_86 (Dense)             (None, 64)                8256      
__________

# Train the Model

In [98]:
epochs = 10
batch_size = 256

model.fit(X, y,
         epochs=epochs,
         batch_size=batch_size)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fc5cd127748>

# Calculate ROC-AUC score

In [99]:
pred_prob = model.predict_proba(X)

score = roc_auc_score(y, pred_prob)

print('roc-auc score={}'.format(score))


roc-auc score=0.828473684500645


In [102]:
pred_prob = model.predict(X)

score = roc_auc_score(y, pred_prob)

print('roc-auc score={}'.format(score))


roc-auc score=0.828473684500645


# Train using XGBoost

In [61]:
import xgboost as xgb

In [63]:
model_xgb = xgb.XGBClassifier()

%time model_xgb.fit(X, y)

CPU times: user 47.9 s, sys: 648 ms, total: 48.5 s
Wall time: 48.5 s


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=1, gamma=0, learning_rate=0.1,
       max_delta_step=0, max_depth=3, min_child_weight=1, missing=None,
       n_estimators=100, n_jobs=1, nthread=None,
       objective='binary:logistic', random_state=0, reg_alpha=0,
       reg_lambda=1, scale_pos_weight=1, seed=None, silent=None,
       subsample=1, verbosity=1)

In [68]:
pred_prob = model_xgb.predict_proba(X)

score = roc_auc_score(y, pred_prob[:, 1])

print('roc-auc score={}'.format(score))

roc-auc score=0.9001997158433197
