In [1]:
import os, glob
import numpy as np
import pandas as pd
from scipy.io import loadmat
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# 1️⃣ Load XPQRS CSVs into a long‑form DataFrame
DATA_DIR = "/kaggle/input/seed-power-quality-disturbance-dataset/XPQRS"
csv_paths = glob.glob(os.path.join(DATA_DIR, "*.csv"))
dfs = []
for fp in csv_paths:
    label = os.path.splitext(os.path.basename(fp))[0]
    df0 = pd.read_csv(fp)
    df_long = df0.melt(var_name="instance", value_name="amplitude")
    df_long["time_idx"] = df_long.groupby("instance").cumcount()
    df_long["label"]    = label
    dfs.append(df_long)
full_df = pd.concat(dfs, ignore_index=True)

# 2️⃣ Pivot to get X raw and y
pivot = full_df.pivot_table(
    index=["label","instance"],
    columns="time_idx",
    values="amplitude"
)
X_raw = pivot.values.astype("float32")  # shape (1700, 999)
labels = pivot.index.get_level_values("label")
le = LabelEncoder()
y = le.fit_transform(labels)            # 0..16

# 3️⃣ Train/test split
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X_raw, y,
    test_size=0.2, stratify=y, random_state=42
)


In [3]:
# ── Cell X: Denoising Autoencoder ──
import numpy as np
from tensorflow.keras import layers, Model

# 1) reshape your raw arrays into (N, 999, 1)
X_train_1d = X_train_raw[..., np.newaxis]
X_test_1d  = X_test_raw[...,  np.newaxis]

# 2) simulate noise
noise_factor    = 0.05
X_train_noisy   = X_train_1d + noise_factor * np.random.normal(size=X_train_1d.shape)
X_test_noisy    = X_test_1d  + noise_factor * np.random.normal(size=X_test_1d.shape)

# 3) build the encoder–decoder
inp = layers.Input((999,1))
# Encoder
x = layers.Conv1D(16,3,padding='same',activation='relu')(inp)
x = layers.MaxPool1D(2,padding='same')(x)   # → (500,16)
x = layers.Conv1D(8,3,padding='same',activation='relu')(x)
encoded = layers.MaxPool1D(2,padding='same')(x) # → (250, 8)
# Decoder
x = layers.Conv1D(8,3,padding='same',activation='relu')(encoded)
x = layers.UpSampling1D(2)(x)                # → (500,8)
x = layers.Conv1D(16,3,padding='same',activation='relu')(x)
x = layers.UpSampling1D(2)(x)                # → (1000,16)
x = layers.Conv1D(1,3,padding='same',activation='linear')(x)
decoded = layers.Cropping1D((0,1))(x)        # back to (999,1)

autoencoder = Model(inp, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()

# 4) train
history_ae = autoencoder.fit(
    X_train_noisy, X_train_1d,
    epochs=20, batch_size=32, shuffle=True,
    validation_data=(X_test_noisy, X_test_1d),
    verbose=2
)

# 5) denoise
X_train_raw = autoencoder.predict(X_train_1d)  # overwrite raw with cleaned
X_test_raw  = autoencoder.predict(X_test_1d)
# reshape back to (N,999) for your downstream cells
X_train_raw = X_train_raw.squeeze(-1)
X_test_raw  = X_test_raw.squeeze(-1)


2025-07-29 20:02:40.245812: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753819360.620368      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753819360.731338      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
I0000 00:00:1753819379.344900      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1753819379.345761      36 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability:

Epoch 1/20


I0000 00:00:1753819383.747671      98 service.cc:148] XLA service 0x7dc0f800c9c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1753819383.748935      98 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1753819383.748953      98 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1753819384.141394      98 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1753819387.119335      98 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


43/43 - 8s - 195ms/step - loss: 0.4892 - val_loss: 0.4010
Epoch 2/20
43/43 - 0s - 6ms/step - loss: 0.3592 - val_loss: 0.3309
Epoch 3/20
43/43 - 0s - 6ms/step - loss: 0.3171 - val_loss: 0.3040
Epoch 4/20
43/43 - 0s - 6ms/step - loss: 0.2959 - val_loss: 0.2885
Epoch 5/20
43/43 - 0s - 7ms/step - loss: 0.2817 - val_loss: 0.2749
Epoch 6/20
43/43 - 0s - 6ms/step - loss: 0.2687 - val_loss: 0.2626
Epoch 7/20
43/43 - 0s - 7ms/step - loss: 0.2576 - val_loss: 0.2525
Epoch 8/20
43/43 - 0s - 7ms/step - loss: 0.2486 - val_loss: 0.2436
Epoch 9/20
43/43 - 0s - 7ms/step - loss: 0.2382 - val_loss: 0.2305
Epoch 10/20
43/43 - 0s - 7ms/step - loss: 0.2219 - val_loss: 0.2121
Epoch 11/20
43/43 - 0s - 7ms/step - loss: 0.2049 - val_loss: 0.1968
Epoch 12/20
43/43 - 0s - 7ms/step - loss: 0.1888 - val_loss: 0.1794
Epoch 13/20
43/43 - 0s - 7ms/step - loss: 0.1702 - val_loss: 0.1609
Epoch 14/20
43/43 - 0s - 7ms/step - loss: 0.1541 - val_loss: 0.1476
Epoch 15/20
43/43 - 0s - 7ms/step - loss: 0.1426 - val_loss: 0.137

In [4]:
from sklearn.svm import SVC
import numpy as np

# Corrected derivative‑feature builder: pads diffs so every order has length T
def build_deriv_features(X):
    # X: (N, T)
    derivs = [X]
    N, T = X.shape
    for order in range(1, 5):
        D = X.copy()
        # apply diff 'order' times
        for _ in range(order):
            D = np.diff(D, axis=1)
        # D now shape (N, T - order)
        # pad 'order' zeros at front to restore length T
        pad = np.zeros((N, order), dtype=X.dtype)
        D_padded = np.concatenate([pad, D], axis=1)
        derivs.append(D_padded)
    # stack into shape (N, T, 5)
    return np.stack(derivs, axis=2)

# Build features
X_train_feats = build_deriv_features(X_train_raw)
X_test_feats  = build_deriv_features(X_test_raw)

print("Feature shapes:", X_train_feats.shape, X_test_feats.shape)
# should both be (N, 999, 5)

# Flatten and train Quadratic SVM with probability estimates
svm = SVC(kernel="poly", degree=2, probability=True, random_state=42)
svm.fit(
    X_train_feats.reshape(len(X_train_feats), -1),  # (N_train, 999*5)
    y_train
)

# Baseline accuracy
train_acc = svm.score(
    X_train_feats.reshape(len(X_train_feats), -1), y_train
)
test_acc  = svm.score(
    X_test_feats.reshape(len(X_test_feats), -1),   y_test
)
print(f"SVM train acc: {train_acc:.3f}, test acc: {test_acc:.3f}")


Feature shapes: (1360, 999, 5) (340, 999, 5)
SVM train acc: 0.889, test acc: 0.888


In [5]:
# 1️⃣ Define a helper to run SVM and decide if we need to fallback
def svm_decision(x_feat_flat, thresh=0.8):
    """
    x_feat_flat: 1D array of shape (999*5,) — flattened derivative features
    Returns: (predicted_class, confidence, use_svm_bool)
    """
    probs = svm.predict_proba(x_feat_flat.reshape(1, -1))[0]
    top_idx = np.argmax(probs)
    top_conf = probs[top_idx]
    if top_conf >= thresh:
        return top_idx, top_conf, True
    else:
        return top_idx, top_conf, False

# 2️⃣ Evaluate coverage and accuracy on the test set
thresh = 0.8
# flatten the test features
X_test_flat = X_test_feats.reshape(len(X_test_feats), -1)

# run SVM on all test samples
svm_preds, svm_confs, use_svm = [], [], []
for x_flat in X_test_flat:
    p, c, use = svm_decision(x_flat, thresh=thresh)
    svm_preds.append(p)
    svm_confs.append(c)
    use_svm.append(use)

svm_preds = np.array(svm_preds)
svm_confs = np.array(svm_confs)
use_svm   = np.array(use_svm)

# coverage = fraction of examples handled by SVM
coverage = use_svm.mean()
# accuracy on covered examples
acc_covered = (svm_preds[use_svm] == y_test[use_svm]).mean()

print(f"SVM coverage at thresh={thresh:.2f}: {coverage*100:.1f}% of test set")
print(f"SVM accuracy on covered ({use_svm.sum()} samples): {acc_covered:.3f}")
print(f"Overall SVM-only test accuracy: {(svm_preds == y_test).mean():.3f}")


SVM coverage at thresh=0.80: 77.6% of test set
SVM accuracy on covered (264 samples): 1.000
Overall SVM-only test accuracy: 0.906


In [6]:
import pywt
import tensorflow as tf

# 4.1 CWT helper (one‑cycle → time‑frequency “image”)
scales = np.arange(1, 64)
wavelet = "morl"

def compute_cwt(x):
    coeffs, _ = pywt.cwt(x, scales, wavelet, sampling_period=1/5000)
    # normalize
    img = (coeffs - coeffs.mean()) / (coeffs.std() + 1e-6)
    return img[..., np.newaxis]  # shape (63, 999, 1)

# 4.2 Build CWT datasets for all train and test
X_train_cwt = np.stack([compute_cwt(x) for x in X_train_raw], axis=0)
X_test_cwt  = np.stack([compute_cwt(x) for x in X_test_raw],  axis=0)

print("X_train_cwt:", X_train_cwt.shape, "X_test_cwt:", X_test_cwt.shape)
# Expect (1360, 63, 999, 1) and (340, 63, 999, 1)


X_train_cwt: (1360, 63, 999, 1) X_test_cwt: (340, 63, 999, 1)


In [10]:
from tensorflow.keras import layers, Model

def build_stage2(input_shape, n_classes):
    inp = layers.Input(input_shape)            # (63, 999, 1)
    x   = layers.Conv2D(16, (3,3), padding="same", activation="relu")(inp)
    x   = layers.MaxPool2D((2,2))(x)           # (31, 499, 16)
    x   = layers.Conv2D(32, (3,3), padding="same", activation="relu")(x)
    x   = layers.MaxPool2D((2,2))(x)           # (15, 249, 32)
    # reshape to (time, features)
    t, h, c = x.shape[1], x.shape[2], x.shape[3]
    x   = layers.Reshape((t, h*c))(x)
    x   = layers.LSTM(64)(x)
    x   = layers.Dropout(0.3)(x)
    out = layers.Dense(n_classes, activation="softmax")(x)
    return Model(inp, out)

stage2_model = build_stage2(X_train_cwt.shape[1:], len(le.classes_))
stage2_model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)
stage2_model.summary()


In [11]:
# 6.1 tf.data pipelines
ds2_train = tf.data.Dataset.from_tensor_slices((X_train_cwt, y_train))\
    .shuffle(2000)\
    .batch(32)\
    .prefetch(tf.data.AUTOTUNE)

ds2_val   = tf.data.Dataset.from_tensor_slices((X_test_cwt, y_test))\
    .batch(32)\
    .prefetch(tf.data.AUTOTUNE)

# 6.2 Train with LR scheduling + early stop
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5,
                                         patience=3, min_lr=1e-5),
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=6,
                                     restore_best_weights=True)
]

history2 = stage2_model.fit(
    ds2_train,
    epochs=30,
    validation_data=ds2_val,
    callbacks=callbacks,
    verbose=2
)


Epoch 1/30
43/43 - 7s - 162ms/step - accuracy: 0.4772 - loss: 1.9848 - val_accuracy: 0.9265 - val_loss: 1.0400 - learning_rate: 0.0010
Epoch 2/30
43/43 - 1s - 35ms/step - accuracy: 0.9331 - loss: 0.7889 - val_accuracy: 0.9765 - val_loss: 0.4684 - learning_rate: 0.0010
Epoch 3/30
43/43 - 1s - 30ms/step - accuracy: 0.9853 - loss: 0.3769 - val_accuracy: 0.9853 - val_loss: 0.2081 - learning_rate: 0.0010
Epoch 4/30
43/43 - 1s - 30ms/step - accuracy: 0.9934 - loss: 0.2077 - val_accuracy: 0.9882 - val_loss: 0.1333 - learning_rate: 0.0010
Epoch 5/30
43/43 - 1s - 30ms/step - accuracy: 0.9941 - loss: 0.1404 - val_accuracy: 0.9882 - val_loss: 0.0969 - learning_rate: 0.0010
Epoch 6/30
43/43 - 2s - 35ms/step - accuracy: 0.9926 - loss: 0.1083 - val_accuracy: 0.9912 - val_loss: 0.0825 - learning_rate: 0.0010
Epoch 7/30
43/43 - 1s - 30ms/step - accuracy: 0.9934 - loss: 0.0913 - val_accuracy: 0.9882 - val_loss: 0.0637 - learning_rate: 0.0010
Epoch 8/30
43/43 - 1s - 31ms/step - accuracy: 0.9919 - loss: 

In [12]:
# 7.1 Stage2 predictions on ALL test samples
stage2_proba = stage2_model.predict(X_test_cwt, verbose=0)
stage2_preds = np.argmax(stage2_proba, axis=1)

# 7.2 Hybrid logic: SVM if confident, else Stage2
hybrid_preds = np.where(
    use_svm,            # boolean mask from Cell 3
    svm_preds,          # SVM’s top‑class idx
    stage2_preds        # fallback deep predictions
)

# 7.3 Compute overall hybrid accuracy
hybrid_acc = (hybrid_preds == y_test).mean()
print(f"Hybrid accuracy: {hybrid_acc:.3f}")
print(f"SVM covered {coverage*100:.1f}% with perfect accuracy, remaining {100-coverage*100:.1f}% handled by Stage2.")


Hybrid accuracy: 0.994
SVM covered 77.6% with perfect accuracy, remaining 22.4% handled by Stage2.


In [13]:
# Cell A (final) — Export as TensorFlow SavedModel
saved_path = "stage2_saved_model"
stage2_model.export(saved_path)
print("SavedModel exported to:", saved_path)

# Verify contents
get_ipython().system('ls -R stage2_saved_model')


Saved artifact at 'stage2_saved_model'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 63, 999, 1), dtype=tf.float32, name='keras_tensor_29')
Output Type:
  TensorSpec(shape=(None, 17), dtype=tf.float32, name=None)
Captures:
  138269630738896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138269630740048: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138269630739472: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138269630740816: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138269630739856: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138269630742160: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138269630741008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138269630741584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138269630741392: TensorSpec(shape=(), dtype=tf.resource, name=None)
SavedModel exported to: stage2_saved_model
stage2_saved_model:
assets	fingerprint

In [14]:
!pip install tf2onnx


Collecting tf2onnx
  Downloading tf2onnx-1.16.1-py3-none-any.whl.metadata (1.3 kB)
INFO: pip is looking at multiple versions of onnx to determine which version is compatible with other requirements. This could take a while.
Collecting onnx>=1.4.1 (from tf2onnx)
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading tf2onnx-1.16.1-py3-none-any.whl (455 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m455.8/455.8 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m76.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: onnx, tf2onnx
  Attempting uninstall: onnx
    Found existing installation: onnx 1.18.0
    Uninstalling onnx-1.18.0:
      Successfully uninstalled onnx-1.18.0
Successful

In [15]:
import tf2onnx
import tensorflow as tf

# Assume stage2_model is your trained tf.keras Model
spec = (tf.TensorSpec((None, 999, 1), tf.float32, name="input"),)
model_proto, external_tensor_storage = tf2onnx.convert.from_keras(
    stage2_model,
    input_signature=spec,
    opset=13,
    output_path="stage2.onnx"
)
print("ONNX model saved to stage2.onnx")


ValueError: in user code:

    File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/input_spec.py", line 245, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "functional_3" is incompatible with the layer: expected shape=(None, 63, 999, 1), found shape=(None, 999)
