## First, vectorize the CSV data

In [22]:
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt

np.random.seed(123)

df = pd.read_csv('creditcard.csv')
df.head(10)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0
5,2.0,-0.425966,0.960523,1.141109,-0.168252,0.420987,-0.029728,0.476201,0.260314,-0.568671,...,-0.208254,-0.559825,-0.026398,-0.371427,-0.232794,0.105915,0.253844,0.08108,3.67,0
6,4.0,1.229658,0.141004,0.045371,1.202613,0.191881,0.272708,-0.005159,0.081213,0.46496,...,-0.167716,-0.27071,-0.154104,-0.780055,0.750137,-0.257237,0.034507,0.005168,4.99,0
7,7.0,-0.644269,1.417964,1.07438,-0.492199,0.948934,0.428118,1.120631,-3.807864,0.615375,...,1.943465,-1.015455,0.057504,-0.649709,-0.415267,-0.051634,-1.206921,-1.085339,40.8,0
8,7.0,-0.894286,0.286157,-0.113192,-0.271526,2.669599,3.721818,0.370145,0.851084,-0.392048,...,-0.073425,-0.268092,-0.204233,1.011592,0.373205,-0.384157,0.011747,0.142404,93.2,0
9,9.0,-0.338262,1.119593,1.044367,-0.222187,0.499361,-0.246761,0.651583,0.069539,-0.736727,...,-0.246914,-0.633753,-0.120794,-0.38505,-0.069733,0.094199,0.246219,0.083076,3.68,0


In [23]:
df.groupby('Class').size()

Class
0    284315
1       492
dtype: int64

In [24]:
features = df.loc[:, 'Time':'Amount'].to_numpy()
targets  = df.loc[:, 'Class':].to_numpy()

print(features.shape)
print(targets.shape)

(284807, 30)
(284807, 1)


## Prepare a validation set

In [25]:
num_val_samples = int(len(features) * 0.2)
train_features = features[:-num_val_samples]
train_targets = targets[:-num_val_samples]
val_features = features[-num_val_samples:]
val_targets = targets[-num_val_samples:]

print("Number of training samples:", len(train_features))
print("Number of validation samples:", len(val_features))

Number of training samples: 227846
Number of validation samples: 56961


## Normalize the data using training set statistics

In [26]:
mean = np.mean(train_features, axis=0)
train_features -= mean
val_features -= mean

std = np.std(train_features, axis=0)
train_features /= std
val_features /= std

## Build a binary classification model

In [27]:
from tensorflow import keras

model = keras.Sequential([keras.layers.Dense(256, activation="relu", 
                                             input_shape=(train_features.shape[-1],)),
                          keras.layers.Dense(256, activation="relu"),
                          keras.layers.Dropout(0.3),
                          keras.layers.Dense(1, activation="sigmoid")])
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 256)               7936      
                                                                 
 dense_10 (Dense)            (None, 256)               65792     
                                                                 
 dropout_3 (Dropout)         (None, 256)               0         
                                                                 
 dense_11 (Dense)            (None, 1)                 257       
                                                                 
Total params: 73,985
Trainable params: 73,985
Non-trainable params: 0
_________________________________________________________________


## Train the model

In [28]:
import tensorflow_addons as tfa

metrics = [
    keras.metrics.FalseNegatives(name="fn"),
    keras.metrics.FalsePositives(name="fp"),
    keras.metrics.TrueNegatives(name="tn"),
    keras.metrics.TruePositives(name="tp"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
    tfa.metrics.F1Score(num_classes=1, threshold=0.5)]

loss_func = tfa.losses.SigmoidFocalCrossEntropy(alpha=0.99, gamma=3.0)
model.compile(
    optimizer=keras.optimizers.Adam(1e-2), loss=loss_func, metrics=metrics)

model.fit(
    train_features,
    train_targets,
    batch_size=2048,
    epochs=20,
    verbose=2,
    validation_data=(val_features, val_targets))

Epoch 1/20
112/112 - 3s - loss: 6.7079e-04 - fn: 55.0000 - fp: 6166.0000 - tn: 221263.0000 - tp: 362.0000 - precision: 0.0555 - recall: 0.8681 - f1_score: 0.1042 - val_loss: 1.4297e-04 - val_fn: 10.0000 - val_fp: 294.0000 - val_tn: 56592.0000 - val_tp: 65.0000 - val_precision: 0.1811 - val_recall: 0.8667 - val_f1_score: 0.2995 - 3s/epoch - 24ms/step
Epoch 2/20
112/112 - 2s - loss: 2.8157e-04 - fn: 47.0000 - fp: 2397.0000 - tn: 225032.0000 - tp: 370.0000 - precision: 0.1337 - recall: 0.8873 - f1_score: 0.2324 - val_loss: 1.4567e-04 - val_fn: 12.0000 - val_fp: 185.0000 - val_tn: 56701.0000 - val_tp: 63.0000 - val_precision: 0.2540 - val_recall: 0.8400 - val_f1_score: 0.3901 - 2s/epoch - 15ms/step
Epoch 3/20
112/112 - 2s - loss: 3.1336e-04 - fn: 46.0000 - fp: 1973.0000 - tn: 225456.0000 - tp: 371.0000 - precision: 0.1583 - recall: 0.8897 - f1_score: 0.2687 - val_loss: 2.7934e-04 - val_fn: 13.0000 - val_fp: 89.0000 - val_tn: 56797.0000 - val_tp: 62.0000 - val_precision: 0.4106 - val_recall

<keras.callbacks.History at 0x219052f1fa0>