In [1]:
from NeuralNet import Value, MLP

In [2]:
import numpy as np
import pandas as pd

In [3]:
class AutoEncoder:
    def __init__(self, nin):
        self.encoder = MLP(nin, [16, 8])
        self.decoder = MLP(8, [16, nin])

    def __call__(self, x):
        z = self.encoder(x)
        out = self.decoder(z)
        return out

    def parameters(self):
        return self.encoder.parameters() + self.decoder.parameters()


In [4]:
def numpy_row_to_value_list(x):
    return [Value(float(v)) for v in x]


In [5]:
df = pd.read_csv("Creditcard.csv")
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [6]:
X = df.drop("Class", axis=1)
y = df["Class"]

In [7]:
X["Time"] = (X["Time"] - X["Time"].mean()) / X["Time"].std()
X["Amount"] = (X["Amount"] - X["Amount"].mean()) / X["Amount"].std()


In [8]:
X_normal = X[y == 0]
X_fraud  = X[y == 1]

print("Normal:", len(X_normal))
print("Fraud:", len(X_fraud))


Normal: 284315
Fraud: 492


In [9]:
X_normal_np = X_normal.values
X_fraud_np  = X_fraud.values


In [10]:
train_np = X_normal_np[:2000]   # use 2000 normal samples
train_data = [numpy_row_to_value_list(x) for x in train_np]


In [11]:
model = AutoEncoder(30)
params = model.parameters()


In [12]:
def mse_loss(y_true, y_pred):
    loss = Value(0.0)
    for yt, yp in zip(y_true, y_pred):
        loss += (yt - yp) ** 2
    return loss * (1.0 / len(y_true))
def zero_grad(params):
    for p in params:
        p.grad = 0

def step(params, lr=0.01):
    for p in params:
        p.data -= lr * p.grad


In [13]:
epochs = 10
lr = 0.01

for epoch in range(epochs):
    total_loss = 0

    for x in train_data:
        out = model(x)
        loss = mse_loss(x, out)

        zero_grad(params)
        loss.backward()
        step(params, lr)

        total_loss += loss.data

    print(f"Epoch {epoch+1}, Loss = {total_loss/len(train_data)}")


Epoch 1, Loss = 1.0686498921505134
Epoch 2, Loss = 0.8060392674571363
Epoch 3, Loss = 0.7303113010265411
Epoch 4, Loss = 0.7064809137315045
Epoch 5, Loss = 0.6896501985382077
Epoch 6, Loss = 0.6753484655080975
Epoch 7, Loss = 0.6618224745696895
Epoch 8, Loss = 0.6412450840309493
Epoch 9, Loss = 0.6254621324137907
Epoch 10, Loss = 0.6165209983985036


In [14]:
def reconstruction_error(x):
    x_val = numpy_row_to_value_list(x)
    out = model(x_val)
    return sum((xi.data - oi.data)**2 for xi, oi in zip(x_val, out)) / len(x)


In [15]:
train_errors = [reconstruction_error(x) for x in train_np]
threshold = np.percentile(train_errors, 99.5)

print("Threshold:", threshold)


Threshold: 5.049237858222057


In [20]:
# Build test set
X_test = np.vstack([
    X_normal_np[:1000],
    X_fraud_np[:200]
])

y_test = np.array([0]*1000 + [1]*200)



y_pred = []
for x in X_test:
    err = reconstruction_error(x)
    y_pred.append(1 if err > threshold else 0)


In [22]:
from sklearn.metrics import confusion_matrix, classification_report

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


[[997   3]
 [ 51 149]]
              precision    recall  f1-score   support

           0       0.95      1.00      0.97      1000
           1       0.98      0.74      0.85       200

    accuracy                           0.95      1200
   macro avg       0.97      0.87      0.91      1200
weighted avg       0.96      0.95      0.95      1200

