# Anomaly detection ANN
That model based on an Artificial Neural Network (ANN) and using k-fold cross-validation for model fitting

## Imports

In [41]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

## Data preparation

In [42]:
dataframe = pd.read_csv('../dataset/ecg_prod_10k.csv', header=None)
raw_data = dataframe.values

labels = raw_data[:, -1]
data = raw_data[:, 0:-1]

train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=50)

train_data[0]

array([0.43856217, 0.31998723, 0.28357966, 0.25503551, 0.31974018,
       0.27382751, 0.30601811, 0.37091615, 0.35952522, 0.36875805,
       0.39390833, 0.38083501, 0.41141457, 0.42161104, 0.44592139,
       0.42734173, 0.44325172, 0.45226464, 0.45588315, 0.45615021,
       0.46126892, 0.4594142 , 0.44860983, 0.45566601, 0.46128901,
       0.4511602 , 0.44098335, 0.44556196, 0.45394015, 0.45378613,
       0.46169566, 0.45548839, 0.46373706, 0.46524348, 0.46590374,
       0.46840626, 0.4626451 , 0.46541407, 0.46733126, 0.44610356,
       0.4463972 , 0.44495674, 0.43810058, 0.43905837, 0.43500268,
       0.43586923, 0.43759447, 0.42949338, 0.42843   , 0.42417574,
       0.42315943, 0.43451327, 0.44184026, 0.43267711, 0.45083406,
       0.45667065, 0.46859403, 0.466902  , 0.46468218, 0.45760407,
       0.45066606, 0.44826285, 0.45120689, 0.44884684, 0.45278948,
       0.45380569, 0.45625984, 0.44249859, 0.45375518, 0.43860282,
       0.43860004, 0.43027888, 0.44647445, 0.44963023, 0.46001

## ANN model

In [43]:
def build_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
    model.add(tf.keras.layers.Dropout(0.1))
    model.add(tf.keras.layers.Dense(32, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.1))
    model.add(tf.keras.layers.Dense(16, activation='relu'))
    model.add(tf.keras.layers.Dropout(0.1))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5),
        loss=tf.keras.losses.binary_crossentropy,
        metrics=[tf.keras.metrics.binary_accuracy]
    )
    return model


## Model training
Below there is a process of fitting model using k-fold cross-validation

In [22]:
k = int(4)
num_val_samples = len(train_data) // k
epochs = 10
all_scores = []

for i in range(k):
    print(f'processing fold #{i}')
    val_data = train_data[i * num_val_samples: (i+1) * num_val_samples]
    val_labels = train_labels[i*num_val_samples: (i+1) * num_val_samples]

    partial_train_data = np.concatenate(
        [train_data[:i * num_val_samples],
         train_data[(i+1) * num_val_samples:]],
        axis=0
    )

    partial_train_labels = np.concatenate(
        [train_labels[:i * num_val_samples],
        train_labels[(i+1) * num_val_samples:]],
        axis=0
    )

    model = build_model()

    model.fit(
        partial_train_data,
        partial_train_labels,
        epochs=epochs,
        batch_size=128
    )

    results = model.evaluate(val_data, val_labels, verbose=0)

processing fold #0
Epoch 1/10
 20/118 [====>.........................] - ETA: 5s - loss: 0.5673 - binary_accuracy: 0.7414

KeyboardInterrupt: 

## Model saving

In [46]:
model = build_model()

model.fit(
    train_data,
    train_labels,
    epochs=15,
    batch_size=128
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x17a3dc4b9d0>

In [47]:
dataframe = pd.read_csv('../dataset/ecg_prod_50k.csv', header=None)
random_dataframe = dataframe
raw_data = random_dataframe.values

labels = raw_data[:, -1]
data = raw_data[:, 0:-1]
result = model.predict(data)



correctly_recognized_samples_normal = 0
correctly_recognized_samples_anomalous = 0
temp = []
corr_max = [0, float('inf')]
for i in np.arange(0.01, 1.0, 0.01):
    for j in range(0, len(result)):
        if result[j] < i and labels[j] == 0:
            correctly_recognized_samples_anomalous += 1
        if result[j] > i and labels[j] == 1:
            correctly_recognized_samples_normal += 1
    if abs(correctly_recognized_samples_normal / 7500 - correctly_recognized_samples_anomalous / 2500) < corr_max[1]:
        corr_max = [i,
                    (correctly_recognized_samples_normal / 7500 + correctly_recognized_samples_anomalous / 2500) / 2]
        temp = [correctly_recognized_samples_normal, correctly_recognized_samples_anomalous]
    correctly_recognized_samples_normal = 0
    correctly_recognized_samples_anomalous = 0
print(corr_max)

[0.99, 5.0]


In [48]:
print(temp)

[37500, 12500]


In [49]:
model.save('../models/detectors/ann_10k_4w')

INFO:tensorflow:Assets written to: ../models/detectors/ann_10k_4w\assets


INFO:tensorflow:Assets written to: ../models/detectors/ann_10k_4w\assets


## Model evaluating and testing