In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    classification_report,
    confusion_matrix
)

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping


2026-01-08 09:32:34.311575: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1767864754.657819      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1767864754.775120      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1767864755.707852      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767864755.707922      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1767864755.707926      55 computation_placer.cc:177] computation placer alr

In [2]:
X = pd.read_csv("/kaggle/input/anamoly/X_features.csv")
y = pd.read_csv("/kaggle/input/anamoly/y_labels.csv").values.ravel()

X.shape, y.shape


((594643, 17), (594643,))

In [3]:
X_normal = X[y == 0]
X_fraud = X[y == 1]

X_normal.shape, X_fraud.shape


((587443, 17), (7200, 17))

In [4]:
X_train, X_val = train_test_split(
    X_normal,
    test_size=0.2,
    random_state=42
)


In [5]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

X_all_scaled = scaler.transform(X)


In [6]:
input_dim = X_train_scaled.shape[1]

input_layer = Input(shape=(input_dim,))
encoded = Dense(32, activation="relu")(input_layer)
encoded = Dense(16, activation="relu")(encoded)

decoded = Dense(32, activation="relu")(encoded)
output_layer = Dense(input_dim, activation="linear")(decoded)

autoencoder = Model(inputs=input_layer, outputs=output_layer)


2026-01-08 09:33:09.002382: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [7]:
autoencoder.compile(
    optimizer="adam",
    loss="mse"
)

autoencoder.summary()


In [8]:
early_stop = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

history = autoencoder.fit(
    X_train_scaled,
    X_train_scaled,
    epochs=50,
    batch_size=256,
    validation_data=(X_val_scaled, X_val_scaled),
    callbacks=[early_stop],
    verbose=1
)


Epoch 1/50
[1m1836/1836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - loss: 0.2491 - val_loss: 0.0031
Epoch 2/50
[1m1836/1836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 0.0016 - val_loss: 6.8477e-05
Epoch 3/50
[1m1836/1836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 7.7319e-05 - val_loss: 7.2805e-05
Epoch 4/50
[1m1836/1836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 7.3333e-05 - val_loss: 1.0238e-05
Epoch 5/50
[1m1836/1836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 1.1339e-05 - val_loss: 3.0350e-06
Epoch 6/50
[1m1836/1836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 5.2902e-05 - val_loss: 3.6901e-06
Epoch 7/50
[1m1836/1836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 2.0414e-05 - val_loss: 0.0020
Epoch 8/50
[1m1836/1836[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 7.6255e-05

In [9]:
reconstructions = autoencoder.predict(X_all_scaled)
reconstruction_error = np.mean(
    np.square(X_all_scaled - reconstructions),
    axis=1
)


[1m18583/18583[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 1ms/step


In [10]:
threshold = np.percentile(
    reconstruction_error[y == 0],
    95
)

threshold


np.float64(5.568913799115599e-07)

In [11]:
y_pred = (reconstruction_error > threshold).astype(int)


In [12]:
precision = precision_score(y, y_pred)
recall = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)

precision, recall, f1


(0.1753558494062158, 0.8675, 0.2917396482869754)

In [13]:
roc_auc = roc_auc_score(y, reconstruction_error)
roc_auc


np.float64(0.9684007576054187)

In [14]:
confusion_matrix(y, y_pred)


array([[558070,  29373],
       [   954,   6246]])

In [15]:
print(classification_report(y, y_pred, digits=4))


              precision    recall  f1-score   support

           0     0.9983    0.9500    0.9735    587443
           1     0.1754    0.8675    0.2917      7200

    accuracy                         0.9490    594643
   macro avg     0.5868    0.9087    0.6326    594643
weighted avg     0.9883    0.9490    0.9653    594643

