## Mobile Payments Fraud Detection Using Arificial Neural Networks

In [1]:


from keras import Sequential
from keras.layers import Dense, Dropout, InputLayer
from keras.callbacks import ModelCheckpoint
from keras.models import clone_model


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
import pandas as pd
import numpy as np
from matplotlib import style
style.use('ggplot') # optional dark_background for dark theme
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import SMOTE

In [3]:
def plot_learningCurve(history, epoch, metric='binary_accuracy'):
    metric_title = metric.replace('_',' ').title()
    # Plot training & validation accuracy values
    epoch_range = range(1, epoch+1)
    plt.plot(epoch_range, history.history[metric])
    plt.plot(epoch_range, history.history['val_'+metric])
    plt.title('Model '+metric_title)
    plt.ylabel(metric_title)
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()
    # Plot training & validation loss values
    plt.plot(epoch_range, history.history['loss'])
    plt.plot(epoch_range, history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()

In [4]:
from sklearn.metrics import confusion_matrix
def print_confusion_matrix(testset,predictions):
    conmat=pd.DataFrame(confusion_matrix(testset, predictions, labels=[1,0]),
                        index=['Actual Fraud','Actual Non-Fraud'],
                        columns=['Pred Fraud','Pred Non-Fraud'])
    TP=conmat.iloc[0,0]/(conmat.iloc[0,0]+conmat.iloc[0,1])
    FN=conmat.iloc[0,1]/(conmat.iloc[0,0]+conmat.iloc[0,1])
    TN=conmat.iloc[1,1]/(conmat.iloc[1,0]+conmat.iloc[1,1])
    FP=conmat.iloc[1,0]/(conmat.iloc[1,0]+conmat.iloc[1,1])
    print('Percent of true positives: {:.2%}'.format(TP))
    print('Percent of false negatives: {:.2%}'.format(FN))
    print('Percent of true negatives: {:.2%}'.format(TN))
    print('Percent of false positives: {:.2%}'.format(FP))
    return conmat

In [5]:
# below are some constant settings for all the models which our team experimented on
batch_bin=32
num_epochs=5
train_pct=0.8
validate_pct=0.2
loss_function='binary_crossentropy'
optimization_function='RMSProp'
metric='binary_accuracy'

### PaySim EDA

In [6]:
data = pd.read_csv('PS_20174392719_1491204439457_log.csv')
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [7]:
data.columns

Index(['step', 'type', 'amount', 'nameOrig', 'oldbalanceOrg', 'newbalanceOrig',
       'nameDest', 'oldbalanceDest', 'newbalanceDest', 'isFraud',
       'isFlaggedFraud'],
      dtype='object')

In [8]:
data.shape

(6362620, 11)

In [9]:
data.isnull().sum()

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64

In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6362620 entries, 0 to 6362619
Data columns (total 11 columns):
step              int64
type              object
amount            float64
nameOrig          object
oldbalanceOrg     float64
newbalanceOrig    float64
nameDest          object
oldbalanceDest    float64
newbalanceDest    float64
isFraud           int64
isFlaggedFraud    int64
dtypes: float64(5), int64(3), object(3)
memory usage: 534.0+ MB


In [11]:
data[['step','type','nameOrig','nameDest','isFraud','isFlaggedFraud']].nunique()

step                  743
type                    5
nameOrig          6353307
nameDest          2722362
isFraud                 2
isFlaggedFraud          2
dtype: int64

In [12]:
data.groupby(['type','isFraud','isFlaggedFraud'])[['isFraud']].count().rename(columns={'isFraud':'count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count
type,isFraud,isFlaggedFraud,Unnamed: 3_level_1
CASH_IN,0,0,1399284
CASH_OUT,0,0,2233384
CASH_OUT,1,0,4116
DEBIT,0,0,41432
PAYMENT,0,0,2151495
TRANSFER,0,0,528812
TRANSFER,1,0,4081
TRANSFER,1,1,16


### Data Preprocessing

In [13]:
# one-hot encoding for the categorical 'type' variable
dummies = pd.get_dummies(data['type'])
data2 = pd.concat([data[['step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest',
                        'isFraud']],dummies],axis=1)

In [14]:
# split data 80-20 across training and testing datasets
x_train, x_test, y_train, y_test = train_test_split(data2.loc[:,data2.columns!='isFraud'],
                                                    data2.loc[:,data2.columns=='isFraud'],
                                                    test_size=validate_pct,train_size=train_pct)

In [15]:
# rescale datasets using StandardScaler on the training datasets
standard = StandardScaler()
standard_x_train = standard.fit_transform(x_train)
standard_x_test = standard.transform(x_test)

In [16]:
# rescale datasets using MinMaxScaler on the training datasets
minmax = MinMaxScaler()
minmax_x_train = minmax.fit_transform(x_train)
minmax_x_test = minmax.transform(x_test)

In [17]:
# reset y variable to numpy array
y_train, y_test = np.array(y_train), np.array(y_test)

In [18]:
# count how many non-fraud and fraud transactions are in the test set
unique,counts = np.unique(y_test,return_counts=True)
dict(zip(unique,counts))

{0: 1270896, 1: 1628}

In [19]:
# separate the data into non-fraud and fraud dataframes
non_fraud = data2[data2['isFraud']==0]
fraud = data2[data2['isFraud']==1]

In [20]:
non_fraud.shape, fraud.shape

((6354407, 12), (8213, 12))

In [21]:
non_fraud = non_fraud.sample(fraud.shape[0])
non_fraud.shape

(8213, 12)

In [22]:
rand_balanced_data = fraud.append(non_fraud, ignore_index=True)
rand_balanced_data.head()

Unnamed: 0,step,amount,oldbalanceOrg,newbalanceOrig,oldbalanceDest,newbalanceDest,isFraud,CASH_IN,CASH_OUT,DEBIT,PAYMENT,TRANSFER
0,1,181.0,181.0,0.0,0.0,0.0,1,0,0,0,0,1
1,1,181.0,181.0,0.0,21182.0,0.0,1,0,1,0,0,0
2,1,2806.0,2806.0,0.0,0.0,0.0,1,0,0,0,0,1
3,1,2806.0,2806.0,0.0,26202.0,0.0,1,0,1,0,0,0
4,1,20128.0,20128.0,0.0,0.0,0.0,1,0,0,0,0,1


In [23]:
rand_balanced_data['isFraud'].value_counts()

1    8213
0    8213
Name: isFraud, dtype: int64

In [24]:
# create a randomly balanced datasets for training and testing the feed forward neural netowrk on
rand_x_train,rand_x_test,rand_y_train,rand_y_test=train_test_split(rand_balanced_data.loc[:,non_fraud.columns!='isFraud'],
                                                                   rand_balanced_data.loc[:,non_fraud.columns=='isFraud'],
                                                                   train_size=train_pct,test_size=validate_pct)
rand_y_train=np.array(rand_y_train)
rand_y_test=np.array(rand_y_test)

In [25]:
# rescale datasets using StandardScaler on the training datasets
rand_standard = StandardScaler()
rand_standard_x_train = standard.fit_transform(rand_x_train)
rand_standard_x_test = standard.transform(rand_x_test)

In [26]:
# rescale datasets using MinMaxScaler on the training datasets
rand_minmax = MinMaxScaler()
rand_minmax_x_train = minmax.fit_transform(rand_x_train)
rand_minmax_x_test = minmax.transform(rand_x_test)

In [27]:
# create a SMOTE balanced version of the standard training dataset
standard_smt = SMOTE()
standard_x_train_sm, standard_y_train_sm = standard_smt.fit_sample(standard_x_train,y_train)

  y = column_or_1d(y, warn=True)


In [28]:
np.bincount(standard_y_train_sm)

array([5083511, 5083511])

In [31]:
# create a SMOTE balanced version of the standard training dataset
minmax_smt = SMOTE()
minmax_x_train_sm, minmax_y_train_sm = minmax_smt.fit_sample(minmax_x_train,y_train)

  y = column_or_1d(y, warn=True)


In [32]:
np.bincount(standard_y_train_sm)

array([5083511, 5083511])

In [34]:
# create a test-train split for the autoencoder using only non-fraud data
x_train_ae, x_test_ae, y_train_ae, y_test_ae = train_test_split(non_fraud.loc[:,non_fraud.columns!='isFraud'],
                                                                non_fraud.loc[:,non_fraud.columns=='isFraud'],
                                                                train_size=train_pct,test_size=validate_pct)

In [35]:
# using standard scaling to rescale the datasets' independent features
standard_ae = StandardScaler()
standard_x_train_ae = standard_ae.fit_transform(x_train_ae)
standard_x_test_ae = standard_ae.transform(x_test_ae)

In [36]:
# using normalized scaling to rescale the datasets' independent features
minmax_ae = MinMaxScaler()
minmax_x_train_ae = minmax_ae.fit_transform(x_train_ae)
minamx_x_test_ae = minmax_ae.transform(x_test_ae)

### Feedforward Neural Network with Standardized Imbalanced Big Dataset

In [27]:
imbalanced_ann = Sequential()

imbalanced_ann.add(InputLayer(input_shape=(11,)))
imbalanced_ann.add(Dense(64,activation='relu'))
imbalanced_ann.add(Dropout(0.2))
imbalanced_ann.add(Dense(32,activation='relu'))
imbalanced_ann.add(Dropout(0.1))
imbalanced_ann.add(Dense(1,activation='sigmoid'))

In [28]:
imbalanced_ann.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                768       
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 2,881
Trainable params: 2,881
Non-trainable params: 0
_________________________________________________________________


In [29]:
imbalanced_ann.compile(loss=loss_function,optimizer=optimization_function,metrics=[metric])

In [38]:
filepath="models/imbalanced-ann-weights-improvement-{epoch:02d}-{val_"+metric+":.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_'+metric, verbose=1, save_best_only=True, mode='max')
callbacks_list_imbalanced_ann = [checkpoint]

In [39]:
standard_imbalaned_ann_fit = imbalanced_ann.fit(standard_x_train,y_train,callbacks=callbacks_list_imbalanced_ann,
                                                batch_size=batch_bin,epochs=num_epochs,validation_split=validate_pct)

Train on 4072076 samples, validate on 1018020 samples
Epoch 1/5
Epoch 00001: val_binary_accuracy improved from -inf to 0.99905, saving model to models/imbalanced-ann-weights-improvement-01-1.00.hdf5
Epoch 2/5
Epoch 00002: val_binary_accuracy improved from 0.99905 to 0.99911, saving model to models/imbalanced-ann-weights-improvement-02-1.00.hdf5
Epoch 3/5
Epoch 00003: val_binary_accuracy improved from 0.99911 to 0.99921, saving model to models/imbalanced-ann-weights-improvement-03-1.00.hdf5
Epoch 4/5
Epoch 00004: val_binary_accuracy did not improve from 0.99921
Epoch 5/5
Epoch 00005: val_binary_accuracy did not improve from 0.99921


In [81]:
standardized_imbalanced_y_pred = imbalanced_ann.predict_classes(standard_x_test)

In [82]:
print_confusion_matrix(y_test,standardized_imbalanced_y_pred)

Percent of true positives: 30.27%
Percent of false negatives: 69.73%
Percent of true negatives: 100.00%
Percent of false positives: 0.00%


Unnamed: 0,Pred Fraud,Pred Non-Fraud
Actual Fraud,490,1129
Actual Non-Fraud,1,1270904


In [80]:
# save standardized imbalanced_ann model
# serialize model to JSON
with open("models/standardized_imbalaned_ann_fraud_detector.json", "w") as json_file:
    json_file.write(imbalanced_ann.to_json())
# serialize weights to HDF5
imbalanced_ann.save_weights("models/standardized_imbalaned_ann_fraud_detector.h5")

### Feedforward Neural Network with Normalized Imbalanced Big Dataset

In [75]:
filepath="models/imbalanced-minmax-ann-weights-improvement-{epoch:02d}-{val_"+metric+":.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_'+metric, verbose=1, save_best_only=True, mode='max')
callbacks_list_imbalanced_ann = [checkpoint]

In [77]:
minmax_imbalanced_ann = clone_model(imbalanced_ann)
minmax_imbalanced_ann.compile(loss=loss_function,optimizer=optimization_function,metrics=[metric])
minamx_imbalaned_ann_fit = minmax_imbalanced_ann.fit(minmax_x_train,y_train,callbacks=callbacks_list_imbalanced_ann,
                                                     batch_size=batch_bin,epochs=num_epochs,validation_split=validate_pct)

Train on 4072076 samples, validate on 1018020 samples
Epoch 1/5
Epoch 00001: val_binary_accuracy improved from -inf to 0.99871, saving model to models/imbalanced-minmax-ann-weights-improvement-01-1.00.hdf5
Epoch 2/5
Epoch 00002: val_binary_accuracy did not improve from 0.99871
Epoch 3/5
Epoch 00003: val_binary_accuracy did not improve from 0.99871
Epoch 4/5
Epoch 00004: val_binary_accuracy did not improve from 0.99871
Epoch 5/5
Epoch 00005: val_binary_accuracy improved from 0.99871 to 0.99872, saving model to models/imbalanced-minmax-ann-weights-improvement-05-1.00.hdf5


In [84]:
normalized_imbalanced_y_pred = minmax_imbalanced_ann.predict_classes(minmax_x_test)

In [85]:
print_confusion_matrix(y_test,normalized_imbalanced_y_pred)

Percent of true positives: 0.31%
Percent of false negatives: 99.69%
Percent of true negatives: 100.00%
Percent of false positives: 0.00%


Unnamed: 0,Pred Fraud,Pred Non-Fraud
Actual Fraud,5,1614
Actual Non-Fraud,1,1270904


In [86]:
# save nornalized imbalanced_ann model
# serialize model to JSON
with open("models/normalized_imbalaned_ann_fraud_detector.json", "w") as json_file:
    json_file.write(minmax_imbalanced_ann.to_json())
# serialize weights to HDF5
minmax_imbalanced_ann.save_weights("models/normalized_imbalaned_ann_fraud_detector.h5")

### Feedforward with Randomly Balanced Small Dataset

In [73]:
balanced_ann = Sequential()

balanced_ann.add(InputLayer(input_shape=(11,)))
balanced_ann.add(Dense(64,activation='relu'))
balanced_ann.add(Dropout(0.2))
balanced_ann.add(Dense(32,activation='relu'))
balanced_ann.add(Dropout(0.1))
balanced_ann.add(Dense(1,activation='sigmoid'))

In [74]:
balanced_ann.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 64)                768       
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 32)                2080      
_________________________________________________________________
dropout_3 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 33        
Total params: 2,881
Trainable params: 2,881
Non-trainable params: 0
_________________________________________________________________


In [91]:
balanced_ann.compile(loss=loss_function,optimizer=optimization_function,metrics=[metric])

In [92]:
filepath="models/balanced-ann-weights-improvement-{epoch:02d}-{val_"+metric+":.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_'+metric, verbose=1, save_best_only=True, mode='max')
callbacks_list_balanced_ann = [checkpoint]

In [96]:
standard_balaned_ann_fit = balanced_ann.fit(rand_standard_x_train,rand_y_train,
                                            callbacks=callbacks_list_balanced_ann,
                                            batch_size=batch_bin,epochs=num_epochs,
                                            validation_split=validate_pct)

Train on 10512 samples, validate on 2628 samples
Epoch 1/5
Epoch 00001: val_binary_accuracy improved from -inf to 0.51560, saving model to models/balanced-ann-weights-improvement-01-0.52.hdf5
Epoch 2/5
Epoch 00002: val_binary_accuracy did not improve from 0.51560
Epoch 3/5
Epoch 00003: val_binary_accuracy improved from 0.51560 to 0.51598, saving model to models/balanced-ann-weights-improvement-03-0.52.hdf5
Epoch 4/5
Epoch 00004: val_binary_accuracy did not improve from 0.51598
Epoch 5/5
Epoch 00005: val_binary_accuracy did not improve from 0.51598


In [97]:
standardized_balanced_y_pred = balanced_ann.predict_classes(rand_standard_x_test)

In [99]:
print_confusion_matrix(rand_y_test,standardized_balanced_y_pred)

Percent of true positives: 98.72%
Percent of false negatives: 1.28%
Percent of true negatives: 1.45%
Percent of false positives: 98.55%


Unnamed: 0,Pred Fraud,Pred Non-Fraud
Actual Fraud,1615,21
Actual Non-Fraud,1626,24


### Feed Forward with SMOTE Big Dataset

In [103]:
smote_ann = Sequential()

smote_ann.add(InputLayer(input_shape=(11,)))
smote_ann.add(Dense(64,activation='relu'))
smote_ann.add(Dropout(0.2))
smote_ann.add(Dense(32,activation='relu'))
smote_ann.add(Dropout(0.1))
smote_ann.add(Dense(1,activation='sigmoid'))

In [104]:
smote_ann.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 64)                768       
_________________________________________________________________
dropout_4 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 32)                2080      
_________________________________________________________________
dropout_5 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 33        
Total params: 2,881
Trainable params: 2,881
Non-trainable params: 0
_________________________________________________________________


In [105]:
smote_ann.compile(loss=loss_function,optimizer=optimization_function,metrics=[metric])

In [106]:
filepath="models/smote-balanced-ann-weights-improvement-{epoch:02d}-{val_"+metric+":.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_'+metric, verbose=1, save_best_only=True, mode='max')
callbacks_list_smote_balanced_ann = [checkpoint]

In [None]:
sandardized_smote_fit=smote_ann.fit(standard_x_train_sm,standard_y_train_sm,
                                    callbacks=callbacks_list_smote_balanced_ann,
                                    batch_size=batch_bin,epochs=num_epochs,validation_split=validate_pct)

In [None]:
standardized_smote_y_pred = smote_ann.predict_classes(standard_x_test_sm)

In [None]:
print_confusion_matrix(standard_y_test_sm,standardized_smote_y_pred)

In [None]:
# save nornalized imbalanced_ann model
# serialize model to JSON
with open("models/standardized_smote_balaned_ann_fraud_detector.json", "w") as json_file:
    json_file.write(smote_ann.to_json())
# serialize weights to HDF5
smote_ann.save_weights("models/standardized_smote_balaned_ann_fraud_detector.h5")

In [36]:
cm

Unnamed: 0,Pred Non-Fraud,Pred Fraud
Actual Non-Fraud,1251919,18944
Actual Fraud,30,1631


### Autoencoder Neural Network

#### main idea : we utilize the replication function of autoencoder nueral network. We train the model only by non-fraud dataset, so that the model can well replicate the features in non-fraud dataset but fails to give good replication for fraud dataset. If we have new unknown data to input to the model, it is classifed as fruad if it return high error, vice versa..

In [38]:
# build up autoencoder model
model1 = Sequential()

model1.add(InputLayer(input_shape=(11,)))
model1.add(Dense(8,activation='relu'))
model1.add(Dense(4,activation='relu'))
model1.add(Dense(8,activation='relu'))
model1.add(Dense(11,activation='linear'))

In [39]:
model1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 8)                 96        
_________________________________________________________________
dense_6 (Dense)              (None, 4)                 36        
_________________________________________________________________
dense_7 (Dense)              (None, 8)                 40        
_________________________________________________________________
dense_8 (Dense)              (None, 11)                99        
Total params: 271
Trainable params: 271
Non-trainable params: 0
_________________________________________________________________


In [40]:
# set paramater of the model
model1.compile(loss='mae',optimizer='RMSProp')

In [54]:
# split dataset, one contains only non-fraud data and one contains only fraud data
non_fraud = data2[data2['isFraud']==0]
fraud = data2[data2['isFraud']==1]

In [55]:
# since we only use non-fraud data to train the model, we split training set and test set from non-fraud data set
x_train_ae, x_test_ae, y_train_ae, y_test_ae = train_test_split(non_fraud.loc[:,non_fraud.columns!='isFraud'],
                                                    non_fraud.loc[:,non_fraud.columns=='isFraud'],
                                                    test_size=0.2,train_size=0.8)

In [59]:
# standardize the value
standard = StandardScaler()
standard_x_train_ae = standard.fit_transform(x_train_ae)
standard_x_test_ae = standard.transform(x_test_ae)

In [60]:
# train model (note: we are trying to replicate the features of non-fraud dataset, so both explanatory and target variables are standard_x_train_ae)
standardfit = model1.fit(standard_x_train_ae,standard_x_train_ae, batch_size=100, epochs=5, validation_split=0.2)

Train on 4066820 samples, validate on 1016705 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [66]:
# evaluate the performance of replicating non-fraud dataset by test set
eva = model1.evaluate(standard_x_test_ae,standard_x_test_ae)



In [67]:
eva

0.0661094989341098

In [62]:
standard_fraud = standard.fit_transform(fraud.loc[:,fraud.columns!='isFraud'])

In [63]:
# evaluate the performace of replicating fraud dataset
fraud_test=model1.evaluate(standard_fraud, standard_fraud)



In [65]:
fraud_test

0.4029295332652707

##### conclusion: the model is good, the error for fraud dataset is significantly higher than non-fraud dataset. fraud_test>>>eva. See the report for more detail