In [110]:
import numpy as np
import pandas
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, RepeatVector, TimeDistributed, Dense

In [111]:
df = pandas.read_csv('system-1.csv')
time_steps = 1
features = 23
df = df.iloc[:, 1:]


In [112]:
def create_sequences(data, time_steps):
    sequences = []
    for i in range(len(data) - time_steps + 1):
        seq = data[i:i + time_steps]
        sequences.append(seq)
    return np.array(sequences)

In [113]:
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(df)
X = create_sequences(data_normalized, time_steps)

train_size = int(len(X) * 0.8)
X_train = X[:train_size]
X_test = X[train_size:]



In [114]:
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (68599, 1, 23)
X_test shape: (17150, 1, 23)


In [115]:
inputs = Input(shape=(time_steps, features))
encoded = LSTM(64, activation='relu', return_sequences=False)(inputs)

decoded = RepeatVector(time_steps)(encoded)
decoded = LSTM(64, activation='relu', return_sequences=True)(decoded)
outputs = TimeDistributed(Dense(features))(decoded)

autoencoder = Model(inputs, outputs)
autoencoder.compile(optimizer='adam', loss='mse')

In [116]:
autoencoder.fit(
    X_train, X_train,  # Using the same data for input and output
    epochs=10,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

Epoch 1/10
[1m1715/1715[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0232 - val_loss: 0.0011
Epoch 2/10
[1m1715/1715[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 1.2702e-04 - val_loss: 7.0729e-04
Epoch 3/10
[1m1715/1715[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 6.4856e-05 - val_loss: 4.4027e-04
Epoch 4/10
[1m1715/1715[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 3.3512e-05 - val_loss: 1.6942e-04
Epoch 5/10
[1m1715/1715[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 1.6887e-05 - val_loss: 7.5159e-05
Epoch 6/10
[1m1715/1715[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 1.3467e-05 - val_loss: 6.4247e-05
Epoch 7/10
[1m1715/1715[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 8.6311e-06 - val_loss: 5.0605e-05
Epoch 8/10
[1m1715/1715[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 8.

<keras.src.callbacks.history.History at 0x31cf678e0>

In [117]:
reconstructed = autoencoder.predict(X_test)

# Calculate reconstruction error
reconstruction_error = np.mean(np.square(X_test - reconstructed), axis=(1, 2))

# Set a threshold for anomalies (e.g., based on quantiles or domain knowledge)
threshold = np.percentile(reconstruction_error, 95)

# Identify anomalies
anomalies = reconstruction_error > threshold

[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 576us/step


In [118]:
anomaly_count = 0
for anomaly in anomalies:
    if anomaly:
        anomaly_count += 1

In [120]:
print("Anomalies found: ", anomaly_count)
print(len(X))
print(len(df))

Anomalies found:  858
85749
85749


In [121]:
print(df.shape)

(85749, 23)
