In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, RepeatVector, TimeDistributed, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

#  Load and preprocess 
file_path = "data/Phasor Measurement Unit Data - Labeled/PMU_data.xlsx"
df = pd.read_excel(file_path).drop(columns=['Unnamed: 0'])
X = df.drop(columns=['Class Labels']).values
y = df['Class Labels'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, "scaler.pkl")

# Create sequences
window = 10
X_seq, y_seq = [], []
for i in range(len(X_scaled) - window):
    X_seq.append(X_scaled[i:i + window])
    y_seq.append(y[i + window])
X_seq = np.array(X_seq, dtype=np.float32)
y_seq = np.array(y_seq, dtype=np.int64)

# Split
split = int(0.8 * len(X_seq))
X_train, X_test = X_seq[:split], X_seq[split:]
y_train, y_test = y_seq[:split], y_seq[split:]

print("Autoencoder data shapes:", X_train.shape, X_test.shape)

# Build LSTM autoencoder 
timesteps = X_train.shape[1]
features = X_train.shape[2]

inputs = Input(shape=(timesteps, features))
encoded = LSTM(64, activation='tanh')(inputs)
encoded = Dense(32, activation='relu')(encoded)

decoded = RepeatVector(timesteps)(encoded)
decoded = LSTM(64, activation='tanh', return_sequences=True)(decoded)
decoded = TimeDistributed(Dense(features))(decoded)

autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()

# Train 
autoencoder.fit(
    X_train, X_train,
    validation_data=(X_test, X_test),
    epochs=10,
    batch_size=128,
    verbose=1
)

# Threshold 
recon_train = autoencoder.predict(X_train)
train_mse = np.mean(np.mean((recon_train - X_train)**2, axis=2), axis=1)
threshold = train_mse.mean() + 3 * train_mse.std()
print(f"Threshold: {threshold}")

# clean test evaluation
recon_test = autoencoder.predict(X_test)
test_mse = np.mean(np.mean((recon_test - X_test)**2, axis=2), axis=1)
y_pred_clean = (test_mse > threshold).astype(int)

cm_clean = confusion_matrix(y_test, y_pred_clean)
print("Clean CM:\n", cm_clean)

# Save CLEAN confusion matrix 
plt.figure(figsize=(6,5))
sns.heatmap(cm_clean, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix â€“ Autoencoder (Clean)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.savefig("cm_clean_autoencoder.png", dpi=300, bbox_inches='tight')
plt.close()

print("Saved: cm_clean_autoencoder.png")

# Adversarial attack
epsilon = 0.05
X_test_adv = X_test + epsilon * np.sign(np.random.randn(*X_test.shape))

recon_test_adv = autoencoder.predict(X_test_adv)
test_adv_mse = np.mean(np.mean((recon_test_adv - X_test_adv)**2, axis=2), axis=1)
y_pred_adv = (test_adv_mse > threshold).astype(int)

cm_adv = confusion_matrix(y_test, y_pred_adv)
print("Adv CM:\n", cm_adv)


plt.figure(figsize=(6,5))
sns.heatmap(cm_adv, annot=True, fmt="d", cmap="Reds")
plt.title("Confusion Matrix  Autoencoder (Adversarial)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.savefig("cm_adv_autoencoder.png", dpi=300, bbox_inches='tight')
plt.close()

print("Saved: cm_adv_autoencoder.png")


Autoencoder data shapes: (81309, 10, 15) (20328, 10, 15)
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 10, 15)]          0         
                                                                 
 lstm_2 (LSTM)               (None, 64)                20480     
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 repeat_vector_1 (RepeatVec  (None, 10, 32)            0         
 tor)                                                            
                                                                 
 lstm_3 (LSTM)               (None, 10, 64)            24832     
                                                                 
 time_distributed_1 (TimeDi  (None, 10, 15)            975       
 s