In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Model,Sequential
from tensorflow.keras.layers import Dense,Dropout
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [3]:
data=pd.read_csv('creditcard.csv')
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [4]:
data['Time']=data['Time'].apply(lambda t:(t/3600)%24)

In [7]:
good_data=data[data['Class']==0].sample(1000)
fraud_data=data[data['Class']==1]
reduced=good_data.append(fraud_data).reset_index(drop=True)
reduced.shape

(1492, 31)

In [9]:
features=reduced.drop('Class',axis=1)
target=reduced['Class']
xtrain,xtest,ytrain,ytest=train_test_split(features,target,test_size=0.2)
train_index=ytrain[ytrain==1].index
train_data=xtrain.loc[train_index]

In [10]:
minmaxscaler=MinMaxScaler()
x_train_scaled=minmaxscaler.fit_transform(train_data.copy())
x_test_scaled=minmaxscaler.transform(xtest.copy())

In [13]:
class AutoEncoder(Model):
    def __init__(self,output_units,ldim=8):
        super().__init__()
        self.encoder=Sequential([
            Dense(64,activation="relu"),
            Dropout(0.1),
            Dense(32,activation='relu'),
            Dropout(0.1),
            Dense(16,activation='relu'),
            Dropout(0.1),
            Dense(ldim,activation='relu')
        ])
        
        self.decoder=Sequential([
            Dense(16,activation='relu'),
            Dropout(0.1),
            Dense(32,activation="relu"),
            Dropout(0.1),
            Dense(64,activation='relu'),
            Dropout(0.1),
            Dense(output_units,activation="sigmoid")
            
        ])
    
    def call(self,input):
        encoded=self.encoder(input)
        decoded=self.decoder(input)
        return decoded

In [14]:
model=AutoEncoder(output_units=x_train_scaled.shape[1])
model.compile(optimizer="adam",loss="msle",metrics=['mse'])

history = model.fit(
    x_train_scaled, x_train_scaled,
    epochs=20, batch_size=512,
    validation_data=(x_test_scaled, x_test_scaled),
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
def find_threshold(model,x_train_scaled):
    reconstruction=model.predict(x_train_scaled)
    reconstruction_errors=tf.keras.losses.msle(reconstruction,x_train_scaled)
    threshold=np.mean(reconstruction_errors.numpy())+0.001*np.std(reconstruction_errors.numpy())
    return threshold

def get_prediction(model,x_test_scaled,threshold):
    reconstruction=model.predict(x_test_scaled)
    error=tf.keras.losses.msle(reconstruction,x_test_scaled)
    anomaly_mask=pd.Series(error)>threshold
    pred=anomaly_mask.map(lambda x:0.0 if x==True else 1.0)
    return pred
    

In [17]:
threshold=find_threshold(model,x_train_scaled)
threshold



0.02276748748150065

In [19]:
predictions=get_prediction(model,x_test_scaled,threshold)
accuracy_score(predictions,ytest)



0.882943143812709