In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, ConvLSTM2D, Flatten
from sklearn.svm import OneClassSVM
from statsmodels.tsa.seasonal import seasonal_decompose

ModuleNotFoundError: No module named 'tensorflow.python'

: 

In [2]:
# Load filtered data
# train_df = pd.read_csv('DeeL\Filtered\filtered_training_scaled.csv')
# valid_df = pd.read_csv('DeeL\Filtered\filtered_validation_scaled.csv')
# test_df = pd.read_csv('DeeL\Filtered\filtered_testing_scaled.csv')

# Load unfiltered data
train_df = pd.read_csv('DeeL\unfiltered\unfiltered_training_scaled.csv')
valid_df = pd.read_csv('DeeL\unfiltered\unfiltered_validation_scaled.csv')
test_df = pd.read_csv('DeeL\unfiltered\unfiltered_testing_scaled.csv')

In [3]:
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_df[['value']])
valid_scaled = scaler.transform(valid_df[['value']])
test_scaled = scaler.transform(test_df[['value']])

In [4]:
sequence_length = 30

def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length, 0])
        y.append(data[i+sequence_length, 0])
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(train_scaled, sequence_length)
X_valid, y_valid = create_sequences(valid_scaled, sequence_length)
X_test, y_test = create_sequences(test_scaled, sequence_length)

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_valid = X_valid.reshape((X_valid.shape[0], X_valid.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [5]:
# Model 1: LSTM
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_valid, y_valid), verbose=0)

predictions = model.predict(X_test, verbose=0)
errors = np.abs(predictions - y_test)
mse_lstm = np.mean(errors)
threshold = np.percentile(errors, 95)
anomalies_lstm = errors > threshold
normalized_lstm_score = (np.sum(anomalies_lstm) / len(errors)) * 100

# Print Results for LSTM
print(f"LSTM MSE: {mse_lstm:.6f}")
print(f"LSTM Normalized Anomaly Score: {normalized_lstm_score:.2f}%")

  super().__init__(**kwargs)


LSTM MSE: 0.216435
LSTM Normalized Anomaly Score: 7595.00%


In [6]:
# Model 2: Autoencoder
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(X_train.shape[1], 1), return_sequences=False))
model.add(RepeatVector(X_train.shape[1]))
model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(1)))

model.compile(optimizer='adam', loss='mse')
history = model.fit(X_train, X_train, epochs=10, batch_size=64, validation_data=(X_valid, X_valid), verbose=0)

reconstruction = model.predict(X_test, verbose=0)
reconstruction_error = np.mean(np.abs(reconstruction - X_test), axis=1)
threshold = np.percentile(reconstruction_error, 95)
anomalies_autoencoder = reconstruction_error > threshold

# Print Results for Autoencoder
print(f"Autoencoder Anomalies: {np.sum(anomalies_autoencoder)}")

Autoencoder Anomalies: 76


In [7]:
# Model 3: ConvLSTM
X_train_reshaped = X_train.reshape((X_train.shape[0], sequence_length, 1, 1, 1))
X_valid_reshaped = X_valid.reshape((X_valid.shape[0], sequence_length, 1, 1, 1))
X_test_reshaped = X_test.reshape((X_test.shape[0], sequence_length, 1, 1, 1))

model = Sequential()
model.add(ConvLSTM2D(64, (3, 3), padding='same', input_shape=(sequence_length, 1, 1, 1), return_sequences=True))
model.add(ConvLSTM2D(64, (3, 3), padding='same', return_sequences=False))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

model.fit(X_train_reshaped, X_train[:, -1], epochs=10, batch_size=64, validation_data=(X_valid_reshaped, X_valid[:, -1]), verbose=0)

predictions = model.predict(X_test_reshaped, verbose=0)
error = np.mean(np.abs(X_test_reshaped[:, :, 0, 0, 0] - predictions), axis=1)
threshold = np.percentile(error, 95)
anomalies_convlstm = error > threshold

# Print Results for ConvLSTM
print(f"ConvLSTM Anomalies: {np.sum(anomalies_convlstm)}")

ConvLSTM Anomalies: 76


In [8]:
# Model 4: One-Class SVM
X_train_flat = X_train.reshape((X_train.shape[0], -1))
X_valid_flat = X_valid.reshape((X_valid.shape[0], -1))
X_test_flat = X_test.reshape((X_test.shape[0], -1))

svm = OneClassSVM(nu=0.05, kernel='rbf', gamma=0.1)
svm.fit(X_train_flat)
predictions = svm.predict(X_test_flat)
anomalies_svm = predictions == -1

# Print Results for One-Class SVM
print(f"One-Class SVM Anomalies: {np.sum(anomalies_svm)}")

One-Class SVM Anomalies: 314


In [9]:
# Model 5: Seasonal Decomposition
series = test_df['value']
decomposition = seasonal_decompose(series, model='additive', period=sequence_length)
residual = decomposition.resid.dropna()
threshold = np.percentile(np.abs(residual), 95)
anomalies_seasonal = np.abs(residual) > threshold

# Print Results for Seasonal Decomposition
print(f"Seasonal Decomposition Anomalies: {np.sum(anomalies_seasonal)}")

Seasonal Decomposition Anomalies: 76


In [10]:
# Model Comparison
results = {
    'LSTM MSE': mse_lstm,
    'LSTM Anomaly Score': normalized_lstm_score,
    'Autoencoder Anomalies': np.sum(anomalies_autoencoder),
    'ConvLSTM Anomalies': np.sum(anomalies_convlstm),
    'One-Class SVM Anomalies': np.sum(anomalies_svm),
    'Seasonal Decomposition Anomalies': np.sum(anomalies_seasonal)
}

for model, score in results.items():
    if 'MSE' in model:
        print(f'{model}: {score:.6f}')
    else:
        percentage = (score / len(errors)) * 100
        print(f'{model}: {score} anomalies ({percentage:.2f}%)')

LSTM MSE: 0.216435
LSTM Anomaly Score: 7594.9967083607635 anomalies (500.00%)
Autoencoder Anomalies: 76 anomalies (5.00%)
ConvLSTM Anomalies: 76 anomalies (5.00%)
One-Class SVM Anomalies: 314 anomalies (20.67%)
Seasonal Decomposition Anomalies: 76 anomalies (5.00%)


In [11]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import numpy as np

# Helper function to calculate performance metrics
def evaluate_anomaly_detection(true_anomalies, predicted_anomalies):
    precision = precision_score(true_anomalies, predicted_anomalies)
    recall = recall_score(true_anomalies, predicted_anomalies)
    f1 = f1_score(true_anomalies, predicted_anomalies)
    auc = roc_auc_score(true_anomalies, predicted_anomalies)
    return precision, recall, f1, auc

# Generate synthetic ground truth anomalies for testing purposes
# This should be replaced with actual ground truth labels if available
true_anomalies = (np.random.rand(len(errors)) < 0.1).astype(int)  # 10% anomalies as an example


In [14]:
# 1. LSTM Performance Test
# ===============================
# Ensure true_anomalies has the same length as predicted_anomalies_lstm
true_anomalies = (np.random.rand(len(predicted_anomalies_lstm)) < 0.1).astype(int)  # Generate 10% anomalies

# Ensure both arrays are binary and have the same shape
true_anomalies = true_anomalies.ravel()  # Flatten ground truth to 1D
predicted_anomalies_lstm = anomalies_lstm.astype(int).ravel()  # Convert predictions to binary and flatten

# Evaluate anomaly detection performance
precision_lstm, recall_lstm, f1_lstm, auc_lstm = evaluate_anomaly_detection(true_anomalies, predicted_anomalies_lstm)

print("LSTM Performance:")
print(f"Precision: {precision_lstm:.2f}, Recall: {recall_lstm:.2f}, F1-Score: {f1_lstm:.2f}, AUC: {auc_lstm:.2f}")


LSTM Performance:
Precision: 0.10, Recall: 0.05, F1-Score: 0.07, AUC: 0.50


In [16]:
# Model 2: Autoencoder Performance Test
# ===============================
# Ensure true_anomalies has the same length as predicted_anomalies_autoencoder
true_anomalies = (np.random.rand(len(anomalies_autoencoder)) < 0.1).astype(int)  # Generate 10% anomalies

# Ensure both arrays are binary and have the same shape
true_anomalies = true_anomalies.ravel()
predicted_anomalies_autoencoder = anomalies_autoencoder.astype(int).ravel()

# Evaluate anomaly detection performance
precision_autoencoder, recall_autoencoder, f1_autoencoder, auc_autoencoder = evaluate_anomaly_detection(
    true_anomalies, predicted_anomalies_autoencoder
)

print("\nAutoencoder Performance:")
print(f"Precision: {precision_autoencoder:.2f}, Recall: {recall_autoencoder:.2f}, F1-Score: {f1_autoencoder:.2f}, AUC: {auc_autoencoder:.2f}")



Autoencoder Performance:
Precision: 0.16, Recall: 0.07, F1-Score: 0.09, AUC: 0.51


In [17]:
# Model 3: ConvLSTM Performance Test
# ===============================
# Ensure true_anomalies has the same length as predicted_anomalies_convlstm
true_anomalies = (np.random.rand(len(anomalies_convlstm)) < 0.1).astype(int)  # Generate 10% anomalies

# Ensure both arrays are binary and have the same shape
true_anomalies = true_anomalies.ravel()
predicted_anomalies_convlstm = anomalies_convlstm.astype(int).ravel()

# Evaluate anomaly detection performance
precision_convlstm, recall_convlstm, f1_convlstm, auc_convlstm = evaluate_anomaly_detection(
    true_anomalies, predicted_anomalies_convlstm
)

print("\nConvLSTM Performance:")
print(f"Precision: {precision_convlstm:.2f}, Recall: {recall_convlstm:.2f}, F1-Score: {f1_convlstm:.2f}, AUC: {auc_convlstm:.2f}")



ConvLSTM Performance:
Precision: 0.11, Recall: 0.05, F1-Score: 0.07, AUC: 0.50


In [18]:
# Model 4: One-Class SVM Performance Test
# ===============================
# Ensure true_anomalies has the same length as predicted_anomalies_svm
true_anomalies = (np.random.rand(len(anomalies_svm)) < 0.1).astype(int)  # Generate 10% anomalies

# Ensure both arrays are binary and have the same shape
true_anomalies = true_anomalies.ravel()
predicted_anomalies_svm = anomalies_svm.astype(int).ravel()

# Evaluate anomaly detection performance
precision_svm, recall_svm, f1_svm, auc_svm = evaluate_anomaly_detection(
    true_anomalies, predicted_anomalies_svm
)

print("\nOne-Class SVM Performance:")
print(f"Precision: {precision_svm:.2f}, Recall: {recall_svm:.2f}, F1-Score: {f1_svm:.2f}, AUC: {auc_svm:.2f}")



One-Class SVM Performance:
Precision: 0.10, Recall: 0.21, F1-Score: 0.13, AUC: 0.50


In [19]:
# Model 5: Seasonal Decomposition Performance Test
# ===============================
# Ensure true_anomalies has the same length as predicted_anomalies_seasonal
true_anomalies = (np.random.rand(len(anomalies_seasonal)) < 0.1).astype(int)  # Generate 10% anomalies

# Ensure both arrays are binary and have the same shape
true_anomalies = true_anomalies.ravel()
predicted_anomalies_seasonal = anomalies_seasonal.astype(int).ravel()

# Evaluate anomaly detection performance
precision_seasonal, recall_seasonal, f1_seasonal, auc_seasonal = evaluate_anomaly_detection(
    true_anomalies, predicted_anomalies_seasonal
)

print("\nSeasonal Decomposition Performance:")
print(f"Precision: {precision_seasonal:.2f}, Recall: {recall_seasonal:.2f}, F1-Score: {f1_seasonal:.2f}, AUC: {auc_seasonal:.2f}")



Seasonal Decomposition Performance:
Precision: 0.07, Recall: 0.04, F1-Score: 0.05, AUC: 0.49


  predicted_anomalies_seasonal = anomalies_seasonal.astype(int).ravel()


In [20]:
# Summary of Results
# ===============================
results_summary = {
    "Model": ["LSTM", "Autoencoder", "ConvLSTM", "One-Class SVM", "Seasonal Decomposition"],
    "Precision": [precision_lstm, precision_autoencoder, precision_convlstm, precision_svm, precision_seasonal],
    "Recall": [recall_lstm, recall_autoencoder, recall_convlstm, recall_svm, recall_seasonal],
    "F1-Score": [f1_lstm, f1_autoencoder, f1_convlstm, f1_svm, f1_seasonal],
    "AUC": [auc_lstm, auc_autoencoder, auc_convlstm, auc_svm, auc_seasonal]
}

results_df = pd.DataFrame(results_summary)
print("\nSummary of Performance Results:")
print(results_df)


Summary of Performance Results:
                    Model  Precision    Recall  F1-Score       AUC
0                    LSTM   0.099854  0.049866  0.066515  0.499926
1             Autoencoder   0.157895  0.067416  0.094488  0.509845
2                ConvLSTM   0.105263  0.054054  0.071429  0.502228
3           One-Class SVM   0.095541  0.208333  0.131004  0.500894
4  Seasonal Decomposition   0.065789  0.039683  0.049505  0.494357
