In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('creditcard.csv')

print(df.head())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [3]:
genuine = df[df['Class'] == 0]
fraud = df[df['Class'] == 1]

In [4]:
def normalize_columns(df):
    for column in df:
        min_val = df[column].min()
        max_val = df[column].max()
        df[column] = (df[column] - min_val)/(max_val - min_val)
    return df

In [5]:
genuine_features = genuine.iloc[:, :-1]
genuine_features = normalize_columns(genuine_features)

In [6]:
fraud_features = fraud.iloc[:, :-1]
fraud_features = normalize_columns(fraud_features)

In [7]:
genuine_features_train = genuine_features.iloc[:-500, :].to_numpy()

In [8]:
genuine_features_test = genuine_features.iloc[-500:, :].to_numpy()
fraud_features_test = fraud_features.to_numpy()

In [9]:
print(genuine_features_train.shape)
print(genuine_features_test.shape)
print(fraud_features_test.shape)

(283815, 30)
(500, 30)
(492, 30)


In [10]:
model = tf.keras.Sequential()

model.add(tf.keras.layers.Input(shape=[30]))
model.add(tf.keras.layers.Dense(14, activation='relu'))
model.add(tf.keras.layers.Dense(7, activation='relu'))
model.add(tf.keras.layers.Dense(14, activation='relu'))
model.add(tf.keras.layers.Dense(30, activation='sigmoid'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 14)                434       
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 105       
_________________________________________________________________
dense_2 (Dense)              (None, 14)                112       
_________________________________________________________________
dense_3 (Dense)              (None, 30)                450       
Total params: 1,101
Trainable params: 1,101
Non-trainable params: 0
_________________________________________________________________


In [11]:
model.compile(optimizer='Adam', loss='mean_squared_error', metrics=['accuracy'])

In [12]:
model.fit(genuine_features_train, genuine_features_train, epochs=10)

Train on 283815 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f329a0de518>

In [13]:
reconstruction = model.predict(genuine_features_test)

In [14]:
reconstruction_error = np.mean(np.power(genuine_features_test - reconstruction, 2), axis=1)

In [15]:
print(reconstruction_error.min())
print(reconstruction_error.max())

0.00047686437590789104
0.009273863549461366


In [16]:
reconstruction = model.predict(fraud_features_test)

In [17]:
reconstruction_error = np.mean(np.power(fraud_features_test - reconstruction, 2), axis=1)

In [18]:
print(reconstruction_error.min())
print(reconstruction_error.max())

0.04148048200763165
0.18242503113790112
