# Anomaly detection ANN
That model based on an Artificial Neural Network (ANN) and using k-fold cross-validation for model fitting

## Imports

In [5]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

## Data preparation

In [26]:
dataframe = pd.read_csv('../dataset/ecg_prod_25k.csv', header=None)
raw_data = dataframe.values

labels = raw_data[:, -1]
data = raw_data[:, 0:-1]

train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=50)

train_data[0]

array([0.37384388, 0.33639914, 0.29220684, 0.2369105 , 0.18396553,
       0.23330904, 0.32890871, 0.34812805, 0.35549459, 0.37457943,
       0.39906553, 0.38121472, 0.39477823, 0.40856196, 0.41803048,
       0.42485283, 0.42846819, 0.43021706, 0.42782421, 0.43061995,
       0.44178457, 0.44811007, 0.44674998, 0.44670517, 0.44145568,
       0.44660167, 0.4498304 , 0.44443793, 0.44170477, 0.43951079,
       0.43901022, 0.44621926, 0.43983829, 0.43034966, 0.42979373,
       0.42337279, 0.42805703, 0.4304129 , 0.43278902, 0.44333507,
       0.44340251, 0.44593788, 0.45297184, 0.46326663, 0.46850411,
       0.46642271, 0.45947805, 0.46982658, 0.47641252, 0.48017024,
       0.47851906, 0.47849433, 0.48098072, 0.47342438, 0.47343824,
       0.46571627, 0.46505243, 0.46624539, 0.45994229, 0.46660134,
       0.46842879, 0.47157793, 0.46928962, 0.47351208, 0.47037597,
       0.47381511, 0.46626443, 0.47107282, 0.48755017, 0.49231223,
       0.49615439, 0.50338477, 0.50035226, 0.50103022, 0.50082

## ANN model

In [27]:
def build_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
    model.add(tf.keras.layers.Dense(32, activation='relu'))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5),
        loss=tf.keras.losses.binary_crossentropy,
        metrics=[tf.keras.metrics.binary_accuracy]
    )
    return model


## Model training
Below there is a process of fitting model using k-fold cross-validation

In [23]:
k = int(4)
num_val_samples = len(train_data) // k
epochs = 10
all_scores = []

for i in range(k):
    print(f'processing fold #{i}')
    val_data = train_data[i * num_val_samples: (i+1) * num_val_samples]
    val_labels = train_labels[i*num_val_samples: (i+1) * num_val_samples]

    partial_train_data = np.concatenate(
        [train_data[:i * num_val_samples],
         train_data[(i+1) * num_val_samples:]],
        axis=0
    )

    partial_train_labels = np.concatenate(
        [train_labels[:i * num_val_samples],
        train_labels[(i+1) * num_val_samples:]],
        axis=0
    )

    model = build_model()

    model.fit(
        partial_train_data,
        partial_train_labels,
        epochs=epochs,
        batch_size=128
    )

    results = model.evaluate(val_data, val_labels, verbose=0)

processing fold #0
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
processing fold #1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
processing fold #2
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
processing fold #3
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Model saving

In [28]:
model = build_model()

model.fit(
    train_data,
    train_labels,
    epochs=epochs,
    batch_size=128
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1f52bcfcf10>

In [30]:
dataframe = pd.read_csv('../dataset/ecg_prod_10k.csv', header=None)
random_dataframe = dataframe
raw_data = random_dataframe.values

labels = raw_data[:, -1]
data = raw_data[:, 0:-1]
result = model.predict(data)


def mean_squared_error(y, y_pred):
    # Calculate the squared errors
    squared_errors = (y - y_pred) ** 2

    # Mean of the sum of squared errors
    mse = np.mean(squared_errors)

    return mse


mse = []

for i in range(len(result)):
    mse.append(mean_squared_error(data[i], result[i]))
correctly_recognized_samples_normal = 0
correctly_recognized_samples_anomalous = 0
temp = []
corr_max = [0, float('inf')]
for i in np.arange(0.01, 1.0, 0.01):
    for j in range(0, len(mse)):
        if mse[j] < i and labels[j] == 0:
            correctly_recognized_samples_anomalous += 1
        if mse[j] > i and labels[j] == 1:
            correctly_recognized_samples_normal += 1
    if abs(correctly_recognized_samples_normal / 18750 - correctly_recognized_samples_anomalous / 6750) < corr_max[1]:
        corr_max = [i,
                    (correctly_recognized_samples_normal / 18750 + correctly_recognized_samples_anomalous / 6750) / 2]
        temp = [correctly_recognized_samples_normal, correctly_recognized_samples_anomalous]
    correctly_recognized_samples_normal = 0
    correctly_recognized_samples_anomalous = 0
print(corr_max)

[0.29000000000000004, 0.635682962962963]
[0.29000000000000004, 0.25414518518518514]


In [25]:
model.save('../models/detectors/ann_10k')

INFO:tensorflow:Assets written to: ../models/detectors/ann_10k\assets


INFO:tensorflow:Assets written to: ../models/detectors/ann_10k\assets


## Model evaluating and testing