In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, ConvLSTM2D, Flatten
from sklearn.svm import OneClassSVM
from statsmodels.tsa.seasonal import seasonal_decompose

In [3]:
# Load filtered data
# train_df = pd.read_csv('https://raw.githubusercontent.com/IKRAMJAAFAR/DeeL/main/Filtered/filtered_training_scaled.csv')
# valid_df = pd.read_csv('https://raw.githubusercontent.com/IKRAMJAAFAR/DeeL/main/Filtered/filtered_validation_scaled.csv')
# test_df = pd.read_csv('https://raw.githubusercontent.com/IKRAMJAAFAR/DeeL/main/Filtered/filtered_testing_scaled.csv')

# Load unfiltered data
train_df = pd.read_csv('https://raw.githubusercontent.com/IKRAMJAAFAR/DeeL/main/unfiltered/unfiltered_training_scaled.csv')
valid_df = pd.read_csv('https://raw.githubusercontent.com/IKRAMJAAFAR/DeeL/main/unfiltered/unfiltered_validation_scaled.csv')
test_df = pd.read_csv('https://raw.githubusercontent.com/IKRAMJAAFAR/DeeL/main/unfiltered/unfiltered_testing_scaled.csv')

In [4]:
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_df[['value']])
valid_scaled = scaler.transform(valid_df[['value']])
test_scaled = scaler.transform(test_df[['value']])

In [5]:
sequence_length = 30

def create_sequences(data, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i+sequence_length, 0])
        y.append(data[i+sequence_length, 0])
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(train_scaled, sequence_length)
X_valid, y_valid = create_sequences(valid_scaled, sequence_length)
X_test, y_test = create_sequences(test_scaled, sequence_length)

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_valid = X_valid.reshape((X_valid.shape[0], X_valid.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [9]:
# Model 1: LSTM
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_valid, y_valid), verbose=0)

predictions = model.predict(X_test, verbose=0)
errors = np.abs(predictions - y_test)
mse_lstm = np.mean(errors)
threshold = np.percentile(errors, 95)
anomalies_lstm = errors > threshold
normalized_lstm_score = (np.sum(anomalies_lstm) / len(errors)) * 100

# Print Results for LSTM
print(f"LSTM MSE: {mse_lstm:.6f}")
print(f"LSTM Normalized Anomaly Score: {normalized_lstm_score:.2f}%")

LSTM MSE: 0.215553
LSTM Normalized Anomaly Score: 7595.00%


In [6]:
# Model 2: Autoencoder
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(X_train.shape[1], 1), return_sequences=False))
model.add(RepeatVector(X_train.shape[1]))
model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(1)))

model.compile(optimizer='adam', loss='mse')
history = model.fit(X_train, X_train, epochs=10, batch_size=64, validation_data=(X_valid, X_valid), verbose=0)

reconstruction = model.predict(X_test, verbose=0)
reconstruction_error = np.mean(np.abs(reconstruction - X_test), axis=1)
threshold = np.percentile(reconstruction_error, 95)
anomalies_autoencoder = reconstruction_error > threshold

# Print Results for Autoencoder
print(f"Autoencoder Anomalies: {np.sum(anomalies_autoencoder)}")

Autoencoder Anomalies: 76


In [7]:
# Model 3: ConvLSTM
X_train_reshaped = X_train.reshape((X_train.shape[0], sequence_length, 1, 1, 1))
X_valid_reshaped = X_valid.reshape((X_valid.shape[0], sequence_length, 1, 1, 1))
X_test_reshaped = X_test.reshape((X_test.shape[0], sequence_length, 1, 1, 1))

model = Sequential()
model.add(ConvLSTM2D(64, (3, 3), padding='same', input_shape=(sequence_length, 1, 1, 1), return_sequences=True))
model.add(ConvLSTM2D(64, (3, 3), padding='same', return_sequences=False))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

model.fit(X_train_reshaped, X_train[:, -1], epochs=10, batch_size=64, validation_data=(X_valid_reshaped, X_valid[:, -1]), verbose=0)

predictions = model.predict(X_test_reshaped, verbose=0)
error = np.mean(np.abs(X_test_reshaped[:, :, 0, 0, 0] - predictions), axis=1)
threshold = np.percentile(error, 95)
anomalies_convlstm = error > threshold

# Print Results for ConvLSTM
print(f"ConvLSTM Anomalies: {np.sum(anomalies_convlstm)}")

ConvLSTM Anomalies: 76


In [8]:
# Model 4: One-Class SVM
X_train_flat = X_train.reshape((X_train.shape[0], -1))
X_valid_flat = X_valid.reshape((X_valid.shape[0], -1))
X_test_flat = X_test.reshape((X_test.shape[0], -1))

svm = OneClassSVM(nu=0.05, kernel='rbf', gamma=0.1)
svm.fit(X_train_flat)
predictions = svm.predict(X_test_flat)
anomalies_svm = predictions == -1

# Print Results for One-Class SVM
print(f"One-Class SVM Anomalies: {np.sum(anomalies_svm)}")

One-Class SVM Anomalies: 314


In [9]:
# Model 5: Seasonal Decomposition
series = test_df['value']
decomposition = seasonal_decompose(series, model='additive', period=sequence_length)
residual = decomposition.resid.dropna()
threshold = np.percentile(np.abs(residual), 95)
anomalies_seasonal = np.abs(residual) > threshold

# Print Results for Seasonal Decomposition
print(f"Seasonal Decomposition Anomalies: {np.sum(anomalies_seasonal)}")

Seasonal Decomposition Anomalies: 76


In [10]:
# Model Comparison
results = {
    'LSTM MSE': mse_lstm,
    'LSTM Anomaly Score': normalized_lstm_score,
    'Autoencoder Anomalies': np.sum(anomalies_autoencoder),
    'ConvLSTM Anomalies': np.sum(anomalies_convlstm),
    'One-Class SVM Anomalies': np.sum(anomalies_svm),
    'Seasonal Decomposition Anomalies': np.sum(anomalies_seasonal)
}

for model, score in results.items():
    if 'MSE' in model:
        print(f'{model}: {score:.6f}')
    else:
        percentage = (score / len(errors)) * 100
        print(f'{model}: {score} anomalies ({percentage:.2f}%)')

LSTM MSE: 0.218000
LSTM Anomaly Score: 7594.9967083607635 anomalies (500.00%)
Autoencoder Anomalies: 76 anomalies (5.00%)
ConvLSTM Anomalies: 76 anomalies (5.00%)
One-Class SVM Anomalies: 314 anomalies (20.67%)
Seasonal Decomposition Anomalies: 76 anomalies (5.00%)
