In [11]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, RepeatVector

In [12]:
# Load your dataset
# Assuming your dataset is stored in a CSV file
data = pd.read_csv('dataset.csv')

# Select features for training
features = ['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9']

In [13]:
# Normalize the data using Min-Max scaling
scaler = MinMaxScaler()
data[features] = scaler.fit_transform(data[features])

# Split the data into training and testing sets
train_size = int(len(data) * 0.8)
train, test = data[0:train_size], data[train_size:]

In [14]:
# Function to create sequences for training the model
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length):
        seq = data.iloc[i:i + sequence_length].values
        sequences.append(seq)
    return np.array(sequences)

In [15]:
# Hyperparameters
sequence_length = 10  # Adjust as needed
latent_dim = 5  # Adjust as needed

# Create sequences for training
train_sequences = create_sequences(train[features], sequence_length)

In [16]:
# Autoencoder Model
input_shape = (sequence_length, len(features))
inputs = Input(shape=input_shape)
encoded = LSTM(latent_dim, activation='relu')(inputs)
decoded = RepeatVector(sequence_length)(encoded)
decoded = LSTM(len(features), activation='sigmoid', return_sequences=True)(decoded)

autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

In [18]:
# Train the model
autoencoder.fit(train_sequences, train_sequences, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x288121b10>

In [19]:
# Use the trained model to reconstruct the test data
test_sequences = create_sequences(test[features], sequence_length)
decoded_sequences = autoencoder.predict(test_sequences)



In [20]:
# Calculate the reconstruction error
mse = np.mean(np.square(test_sequences - decoded_sequences), axis=(1, 2))
print(mse)

[0.00111285 0.00111301 0.00111316 ... 0.0060132  0.00599964 0.00649541]


In [21]:
print(len(mse), len(test))


33913 33923


In [23]:
mse = mse[:len(test)]


In [25]:
# Set a threshold for anomaly detection
threshold = 0.01  # Adjust as needed

# Identify anomalies
anomalies = test.iloc[:len(mse)][mse > threshold]

# Print the anomalies along with their features
print("Anomalies:")
print(anomalies[features])


Anomalies:
              P1        P2        P3        P4        P5        P6   P7   P8  \
152046  0.460674  0.235294  0.294389  0.282051  0.931034  0.951220  0.0  0.0   
152047  0.460674  0.235294  0.294389  0.282051  0.931034  0.951220  0.0  0.0   
152048  0.460674  0.235294  0.294389  0.282051  0.931034  0.951220  0.0  0.0   
152049  0.426966  0.294118  0.294389  0.282051  0.931034  0.951220  0.0  0.0   
152050  0.426966  0.294118  0.007686  0.576535  0.931034  0.951220  0.0  0.0   
...          ...       ...       ...       ...       ...       ...  ...  ...   
169548  0.471910  0.264706  0.494235  0.490287  0.620690  0.804878  0.0  0.0   
169549  0.471910  0.264706  0.494235  0.490287  0.620690  0.804878  0.0  0.0   
169550  0.471910  0.264706  0.494235  0.490287  0.620690  0.804878  0.0  0.0   
169551  0.471910  0.264706  0.494235  0.490287  0.620690  0.804878  0.0  0.0   
169552  0.471910  0.264706  0.494235  0.490287  0.620690  0.804878  0.0  0.0   

         P9  
152046  0.0  


In [40]:
autoencoder.save('autoencoder_model.h5')


You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.



In [None]:
from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model('autoencoder_model.h5')
