In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [2]:
PATH_TO_DATA = 'http://storage.googleapis.com/download.tensorflow.org/data/ecg.csv'
data = pd.read_csv(PATH_TO_DATA, header=None)

In [3]:
x_train, x_test = train_test_split(data, test_size=0.2, random_state=42)

In [4]:
x_train.shape

(3998, 141)

In [5]:
input_dim = x_train.shape[1]

In [10]:
encoder_input = tf.keras.Input(shape=(input_dim,))
x = tf.keras.layers.Dense(64, activation='relu')(encoder_input)
x = tf.keras.layers.Dense(32, activation='relu')(x)
latent = tf.keras.layers.Dense(16, activation='relu')(x)
encoder = tf.keras.Model(encoder_input, latent, name='encoder')

In [12]:
decoder_input = tf.keras.Input(shape=(16, ))
x = tf.keras.layers.Dense(64, activation='relu')(decoder_input)
x = tf.keras.layers.Dense(64, activation='relu')(x)
decoder_output = tf.keras.layers.Dense(input_dim, activation='sigmoid')(x)
decoder = tf.keras.Model(decoder_input, decoder_output, name='decoder')

autoencoder_input = tf.keras.Input(shape=(input_dim, ))
encoded = encoder(autoencoder_input)
decoder = decoder(encoded)
autoencoder = tf.keras.Model(autoencoder_input, decoder, name='autoencoder') 

In [13]:
autoencoder.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mae')

autoencoder.fit(x_train, x_train, epochs=10, batch_size=32, shuffle=True, validation_data=(x_test, x_test))

Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.6010 - val_loss: 0.4740
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4654 - val_loss: 0.4455
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4410 - val_loss: 0.4362
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4343 - val_loss: 0.4328
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4318 - val_loss: 0.4288
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4284 - val_loss: 0.4262
Epoch 7/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4250 - val_loss: 0.4246
Epoch 8/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4254 - val_loss: 0.4234
Epoch 9/10
[1m125/125[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x1f911bf7230>

In [None]:
reconstructions = autoencoder.predict(x_test)
mse = np.mean(np.power(x_test - reconstructions, 2), axis=1)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [15]:
mse

84      0.797117
2470    0.703502
2803    0.646770
4986    0.657184
4923    0.829984
          ...   
4522    0.811945
1918    0.696891
3862    0.817465
4198    0.835934
3885    0.802122
Length: 1000, dtype: float64

In [20]:
threshold = np.mean(mse) + 3* np.std(mse)
threshold

0.9453522960246519

In [21]:
anomalies = np.where(mse > threshold)
anomalies

(array([360, 421, 727], dtype=int64),)