In [1]:
import pandas as pd


train = pd.read_csv("train_res.csv")
X_train = train.drop("Class", axis=1)
y_train = train["Class"]


X_train_normal = X_train[y_train == 0]


In [2]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

input_dim = X_train_normal.shape[1]


input_layer = Input(shape=(input_dim,))
encoded = Dense(16, activation='relu')(input_layer)
encoded = Dense(8, activation='relu')(encoded)


decoded = Dense(16, activation='relu')(encoded)
decoded = Dense(input_dim, activation='linear')(decoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mse')


In [3]:
autoencoder.fit(
    X_train_normal, X_train_normal,
    epochs=50,
    batch_size=256,
    validation_split=0.1,
    shuffle=True
)


Epoch 1/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.6693 - val_loss: 0.4388
Epoch 2/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.4029 - val_loss: 0.3630
Epoch 3/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3602 - val_loss: 0.3439
Epoch 4/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.3459 - val_loss: 0.3360
Epoch 5/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3388 - val_loss: 0.3301
Epoch 6/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.3342 - val_loss: 0.3267
Epoch 7/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.3309 - val_loss: 0.3236
Epoch 8/50
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 0.3283 - val_loss: 0.3217
Epoch 9/50
[1m800/800[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x24a2607efe0>

In [4]:
import numpy as np

X_test = pd.read_csv("test_set.csv").drop("Class", axis=1)
y_test = pd.read_csv("test_set.csv")["Class"]

X_test_pred = autoencoder.predict(X_test)
reconstruction_error = np.mean(np.square(X_test - X_test_pred), axis=1)


[1m1781/1781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 855us/step


In [5]:
threshold = np.percentile(reconstruction_error[y_test==0], 95)
y_pred_auto = (reconstruction_error > threshold).astype(int)


In [6]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
print("Autoencoder Metrics:")
print("Precision:", precision_score(y_test, y_pred_auto))
print("Recall:", recall_score(y_test, y_pred_auto))
print("F1-score:", f1_score(y_test, y_pred_auto))
print("AUC-ROC:", roc_auc_score(y_test, reconstruction_error))

Autoencoder Metrics:
Precision: 0.02968270214943705
Recall: 0.8877551020408163
F1-score: 0.05744470122152526
AUC-ROC: 0.9664717392303009


In [7]:
autoencoder.save("autoencoder_model.h5")

