In [None]:
import pandas as pd
import numpy as np
import math
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, roc_curve, auc

RANDOM_SEED = 2021

if __name__ == '__main__':
    data=pd.read_csv('2_annthyroid.csv')
    X_data = data.iloc[:, :-1].values  # Use .values to convert to a NumPy array
    y = data.iloc[:, -1].values

    X_train=[]

    normal_data = X_data[y == 0]

    train_threshold = math.floor(0.7 * len(data))
    for i in range(train_threshold):
        X_train.append(normal_data[i])

    X = np.array(X_data)
    min_val = tf.reduce_min(X)
    max_val = tf.reduce_max(X)
    X = (X - min_val) / (max_val - min_val)
    X = tf.cast(X, tf.float32)

    nb_epoch = 50
    batch_size = 64
    input_dim = X.shape[1]  # num of columns
    encoding_dim = 14
    hidden_dim_1 = int(encoding_dim / 2)
    hidden_dim_2 = 4
    learning_rate = 1e-7

    input_layer = tf.keras.layers.Input(shape=(input_dim,))
    encoder = tf.keras.layers.Dense(encoding_dim, activation="tanh",
                                    activity_regularizer=tf.keras.regularizers.l2(learning_rate))(input_layer)
    encoder = tf.keras.layers.Dropout(0.2)(encoder)
    encoder = tf.keras.layers.Dense(hidden_dim_1, activation='relu')(encoder)
    encoder = tf.keras.layers.Dense(hidden_dim_2, activation=tf.nn.leaky_relu)(encoder)

    # Decoder
    decoder = tf.keras.layers.Dense(hidden_dim_1, activation='relu')(encoder)
    decoder = tf.keras.layers.Dropout(0.2)(decoder)
    decoder = tf.keras.layers.Dense(encoding_dim, activation='relu')(decoder)
    decoder = tf.keras.layers.Dense(input_dim, activation='tanh')(decoder)

    autoencoder = tf.keras.Model(inputs=input_layer, outputs=decoder)
    autoencoder.summary()

    cp = tf.keras.callbacks.ModelCheckpoint(filepath="autoencoder_fraud.h5",
                                            mode='min', monitor='val_loss', verbose=2, save_best_only=True)

    # Define early stopping
    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=0.0001,
        patience=10,
        verbose=1,
        mode='min',
        restore_best_weights=True)

    autoencoder.compile(metrics=['accuracy'],
                        loss='mean_squared_error',
                        optimizer='adam')


    X_test=X_data
    X_train = np.array(X_train)


    history = autoencoder.fit(X_train, X_train,
                              epochs=nb_epoch,
                              batch_size=batch_size,
                              shuffle=True,
                              validation_data=(X_test, X_test),
                              verbose=1,
                              callbacks=[cp, early_stop]
                              ).history

    test_x_predictions = autoencoder.predict(X_test)
    mse = np.mean(np.power(X_test - test_x_predictions, 2), axis=1)

    y_probs = mse  # You can use MSE as your anomaly score

    lper = np.percentile(mse, 5)
    uper = np.percentile(mse, 95)

    # Classify data points as anomalies (1) or normal (0)
    y_pred = (mse <= lper) | (mse >= uper)

    # Calculate precision, recall, F1 score, and AUC-ROC
    precision_auto = precision_score(y, y_pred)
    recall_auto = recall_score(y, y_pred)
    f1_score_auto = f1_score(y, y_pred)
    fpr8, tpr8, thresholds = roc_curve(y, y_pred)
    auc_roc_auto = auc(fpr8, tpr8)


    print("Precision: {:.4f}".format(precision_auto))
    print("Recall: {:.4f}".format(recall_auto))
    print("F1-score: {:.4f}".format(f1_score_auto))
    print("AUC-ROC: {:.4f}".format(auc_roc_auto))