<a href="https://colab.research.google.com/github/PETEROA/Anomaly/blob/main/hybrid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install EMD-signal

In [14]:
import numpy as np
import pandas as pd
from PyEMD import EMD
from scipy.stats import zscore
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


In [15]:
df = pd.read_csv('modified_UCI.csv')

In [None]:
df.head

In [22]:
columns_to_extract = ['CO(GT)', 'C6H6(GT)']
data = df[columns_to_extract].values.flatten()

# Apply Hampel Identifier
def hampel_identifier(data, window_size=5, n_sigma=3):
    median = pd.Series(data).rolling(window=window_size, center=True).median()
    deviation = np.abs(data - median)
    median_deviation = deviation.rolling(window=window_size, center=True).median()
    z_score = 0.6745 * deviation / (median_deviation + 1e-10)
    outliers = z_score > n_sigma
    return outliers


outliers_hampel = hampel_identifier(data)

# Apply Empirical Mode Decomposition (EMD)
def apply_emd(data):
    emd = EMD()
    imfs = emd(data)
    return imfs

imfs = apply_emd(data)

# Combine IMFs and original data
hybrid_data = np.vstack([imfs, data]).T

# Normalize hybrid data
scaler = MinMaxScaler()
hybrid_data_normalized = scaler.fit_transform(hybrid_data)

# Split into sequences for RNN
sequence_length = 4
sequences = [hybrid_data_normalized[i:i + sequence_length] for i in range(len(hybrid_data_normalized) - sequence_length)]
targets = outliers_hampel[sequence_length:]

# Convert to NumPy arrays
X = np.array(sequences)
y = np.array(targets)

# Build and train the hybrid RNN model
model = Sequential()
model.add(LSTM(units=50, activation='relu', input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=10, batch_size=32, verbose=0)           # Set verbose to 0 to suppress output


<keras.src.callbacks.History at 0x7f1b4c67cbe0>

In [23]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Predict on the training set for evaluation
y_pred = (model.predict(X) > 0.5).astype(int)

# Evaluate the model
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred)
recall = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)
conf_matrix = confusion_matrix(y, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('Confusion Matrix:')
print(conf_matrix)


Accuracy: 0.8361
Precision: 0.7812
Recall: 0.0081
F1 Score: 0.0160
Confusion Matrix:
[[15618     7]
 [ 3060    25]]
