In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns



2025-12-18 13:08:56.498934: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-12-18 13:08:56.578931: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-18 13:08:57.206855: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-18 13:08:57.211979: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:

file_path = "data/Phasor Measurement Unit Data - Labeled/PMU_data.xlsx"
df = pd.read_excel(file_path).drop(columns=["Unnamed: 0"])

X = df.drop(columns=["Class Labels"]).values
y = df["Class Labels"].values.astype(np.int64)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

window = 10
X_seq, y_seq = [], []
for i in range(len(X_scaled) - window):
    X_seq.append(X_scaled[i:i + window])
    y_seq.append(y[i + window])
X_seq = np.array(X_seq, dtype=np.float32)
y_seq = np.array(y_seq, dtype=np.int64)

split = int(0.8 * len(X_seq))
X_train, X_test = X_seq[:split], X_seq[split:]
y_train, y_test = y_seq[:split], y_seq[split:]

print(f" Data reshaped for LSTM: {X_seq.shape} (train={X_train.shape}, test={X_test.shape})")


# Build baseline LSTM

model = Sequential([
    LSTM(64, input_shape=(window, X_seq.shape[2])),
    Dropout(0.3),
    Dense(32, activation="relu"),
    Dropout(0.2),
    Dense(1, activation="sigmoid")
])
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
model.summary()


# Train on clean data

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=10,
    batch_size=80,
    verbose=1,
)

# Evaluate on clean test data
y_pred_clean_prob = model.predict(X_test)
y_pred_clean = (y_pred_clean_prob > 0.5).astype(int).ravel()
clean_acc = accuracy_score(y_test, y_pred_clean)
print(f"\n Clean test accuracy (baseline): {clean_acc:.4f}")




loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False)

def fgsm_attack(model, x, y, eps=0.1):
    """
    x: (N, 10, 15), y: (N,)
    eps: attack strength
    """
    x_var = tf.convert_to_tensor(x)
    y_var = tf.convert_to_tensor(y.reshape(-1, 1).astype(np.float32))

    with tf.GradientTape() as tape:
        tape.watch(x_var)
        preds = model(x_var, training=False)
        loss = loss_fn(y_var, preds)

    grad = tape.gradient(loss, x_var)
    x_adv = x_var + eps * tf.sign(grad)

    
    x_min = np.min(X_train)
    x_max = np.max(X_train)
    x_adv = tf.clip_by_value(x_adv, x_min, x_max)

    return x_adv.numpy()

eps = 0.1  
print(f"\n Generating FGSM adversarial examples with eps={eps} ...")
X_test_adv = fgsm_attack(model, X_test, y_test, eps=eps)



y_pred_adv_prob = model.predict(X_test_adv)
y_pred_adv = (y_pred_adv_prob > 0.5).astype(int).ravel()
adv_acc = accuracy_score(y_test, y_pred_adv)
drop = (clean_acc - adv_acc) * 100.0

print(f" Adversarial test accuracy (baseline under FGSM): {adv_acc:.4f}")
print(f" Accuracy drop under attack: {drop:.2f}%")


# Confusion matrices and plots

cm_clean = confusion_matrix(y_test, y_pred_clean)
cm_adv = confusion_matrix(y_test, y_pred_adv)

print("\nConfusion matrix (clean):\n", cm_clean)
print("\nConfusion matrix (FGSM attack):\n", cm_adv)

def plot_cm(cm, title, filename):
    plt.figure(figsize=(4, 4))
    sns.heatmap(
        cm, annot=True, fmt="d", cbar=False,
        xticklabels=["Pred 0", "Pred 1"],
        yticklabels=["True 0", "True 1"],
    )
    plt.title(title)
    plt.ylabel("True label")
    plt.xlabel("Predicted label")
    plt.tight_layout()
    plt.savefig(filename, dpi=200)
    plt.close()

plot_cm(cm_clean, "Baseline LSTM - Clean", "cm_clean_baseline.png")
plot_cm(cm_adv, "Baseline LSTM - FGSM Attack", "cm_adv_baseline.png")

print('\n Saved confusion matrices as "cm_clean_baseline.png" and "cm_adv_baseline.png"')


 Data reshaped for LSTM: (101637, 10, 15) (train=(81309, 10, 15), test=(20328, 10, 15))
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                20480     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 32)                2080      
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 33        
                                                                 
Total params: 22593 (88.25 KB)
Trainable params: 22593 (88.25 KB)
Non-trainable params: 0 (0.00 Byte