In [14]:
X_legit = X_train[y_train == 0]
X_fraud = X_train[y_train == 1]

X_legit_train, X_legit_val = train_test_split(X_legit, test_size=0.2, random_state=42)

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam

input_dim = X_legit_train.shape[1]
encoding_dim = 8

input_layer = Input(shape=[input_dim])
x = Dense(64, activation="relu")(input_layer)
x = Dense(32, activation="relu")(x)
bottleneck = Dense(8, activation="relu")(x)
x = Dense(32, activation="relu")(bottleneck)
x = Dense(64, activation="relu")(x)
output = Dense(input_dim, activation="linear")(x)

autoencoder = Model(input_layer, output)
autoencoder.compile(optimizer=Adam(0.001), loss='mse')
autoencoder.fit(
    X_legit_train, X_legit_train,
    validation_data=(X_legit_val, X_legit_val),
    epochs=20,
    batch_size=64,
    shuffle=True
)

reconstructions = autoencoder.predict(X_test)
mse = np.mean(np.square(X_test - reconstructions), axis=1)
#Anomaly Threshold
threshold = np.percentile(
    np.mean(np.square(X_legit_val-autoencoder.predict(X_legit_val)), axis=1), 99)
#Flagging Anomalies
auto_preds = (mse > threshold).astype(int)

Epoch 1/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.7888 - val_loss: 0.4629
Epoch 2/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.2471 - val_loss: 0.2675
Epoch 3/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.1338 - val_loss: 0.1432
Epoch 4/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0516 - val_loss: 0.1239
Epoch 5/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0412 - val_loss: 0.1292
Epoch 6/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0450 - val_loss: 0.1065
Epoch 7/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0303 - val_loss: 0.0993
Epoch 8/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0234 - val_loss: 0.0931
Epoch 9/20
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [19]:
from sklearn.metrics import classification_report, roc_auc_score
print(classification_report(y_test, auto_preds))
print("ROC-AUC:", roc_auc_score(y_test, mse))

              precision    recall  f1-score   support

           0       0.58      0.99      0.73      1700
           1       0.97      0.25      0.39      1643

    accuracy                           0.63      3343
   macro avg       0.77      0.62      0.56      3343
weighted avg       0.77      0.63      0.56      3343

ROC-AUC: 0.8707815688661343


In [20]:
if not os.path.exists('models'):
    os.makedirs('models')
autoencoder.save("models/autoencoder_model.h5")


