In [15]:
import os, glob
import numpy as np
import pandas as pd
from scipy.io import loadmat
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# 1️⃣ Load XPQRS CSVs into a long‑form DataFrame
DATA_DIR = "/kaggle/input/seed-power-quality-disturbance-dataset/XPQRS"
csv_paths = glob.glob(os.path.join(DATA_DIR, "*.csv"))
dfs = []
for fp in csv_paths:
    label = os.path.splitext(os.path.basename(fp))[0]
    df0 = pd.read_csv(fp)
    df_long = df0.melt(var_name="instance", value_name="amplitude")
    df_long["time_idx"] = df_long.groupby("instance").cumcount()
    df_long["label"]    = label
    dfs.append(df_long)
full_df = pd.concat(dfs, ignore_index=True)

# 2️⃣ Pivot to get X raw and y
pivot = full_df.pivot_table(
    index=["label","instance"],
    columns="time_idx",
    values="amplitude"
)
X_raw = pivot.values.astype("float32")  # shape (1700, 999)
labels = pivot.index.get_level_values("label")
le = LabelEncoder()
y = le.fit_transform(labels)            # 0..16

# 3️⃣ Train/test split
X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X_raw, y,
    test_size=0.2, stratify=y, random_state=42
)


In [17]:
from sklearn.svm import SVC
import numpy as np

# Corrected derivative‑feature builder: pads diffs so every order has length T
def build_deriv_features(X):
    # X: (N, T)
    derivs = [X]
    N, T = X.shape
    for order in range(1, 5):
        D = X.copy()
        # apply diff 'order' times
        for _ in range(order):
            D = np.diff(D, axis=1)
        # D now shape (N, T - order)
        # pad 'order' zeros at front to restore length T
        pad = np.zeros((N, order), dtype=X.dtype)
        D_padded = np.concatenate([pad, D], axis=1)
        derivs.append(D_padded)
    # stack into shape (N, T, 5)
    return np.stack(derivs, axis=2)

# Build features
X_train_feats = build_deriv_features(X_train_raw)
X_test_feats  = build_deriv_features(X_test_raw)

print("Feature shapes:", X_train_feats.shape, X_test_feats.shape)
# should both be (N, 999, 5)

# Flatten and train Quadratic SVM with probability estimates
svm = SVC(kernel="poly", degree=2, probability=True, random_state=42)
svm.fit(
    X_train_feats.reshape(len(X_train_feats), -1),  # (N_train, 999*5)
    y_train
)

# Baseline accuracy
train_acc = svm.score(
    X_train_feats.reshape(len(X_train_feats), -1), y_train
)
test_acc  = svm.score(
    X_test_feats.reshape(len(X_test_feats), -1),   y_test
)
print(f"SVM train acc: {train_acc:.3f}, test acc: {test_acc:.3f}")


Feature shapes: (1360, 999, 5) (340, 999, 5)
SVM train acc: 0.875, test acc: 0.885


In [18]:
# 1️⃣ Define a helper to run SVM and decide if we need to fallback
def svm_decision(x_feat_flat, thresh=0.8):
    """
    x_feat_flat: 1D array of shape (999*5,) — flattened derivative features
    Returns: (predicted_class, confidence, use_svm_bool)
    """
    probs = svm.predict_proba(x_feat_flat.reshape(1, -1))[0]
    top_idx = np.argmax(probs)
    top_conf = probs[top_idx]
    if top_conf >= thresh:
        return top_idx, top_conf, True
    else:
        return top_idx, top_conf, False

# 2️⃣ Evaluate coverage and accuracy on the test set
thresh = 0.8
# flatten the test features
X_test_flat = X_test_feats.reshape(len(X_test_feats), -1)

# run SVM on all test samples
svm_preds, svm_confs, use_svm = [], [], []
for x_flat in X_test_flat:
    p, c, use = svm_decision(x_flat, thresh=thresh)
    svm_preds.append(p)
    svm_confs.append(c)
    use_svm.append(use)

svm_preds = np.array(svm_preds)
svm_confs = np.array(svm_confs)
use_svm   = np.array(use_svm)

# coverage = fraction of examples handled by SVM
coverage = use_svm.mean()
# accuracy on covered examples
acc_covered = (svm_preds[use_svm] == y_test[use_svm]).mean()

print(f"SVM coverage at thresh={thresh:.2f}: {coverage*100:.1f}% of test set")
print(f"SVM accuracy on covered ({use_svm.sum()} samples): {acc_covered:.3f}")
print(f"Overall SVM-only test accuracy: {(svm_preds == y_test).mean():.3f}")


SVM coverage at thresh=0.80: 71.8% of test set
SVM accuracy on covered (244 samples): 1.000
Overall SVM-only test accuracy: 0.912


In [19]:
import pywt
import tensorflow as tf

# 4.1 CWT helper (one‑cycle → time‑frequency “image”)
scales = np.arange(1, 64)
wavelet = "morl"

def compute_cwt(x):
    coeffs, _ = pywt.cwt(x, scales, wavelet, sampling_period=1/5000)
    # normalize
    img = (coeffs - coeffs.mean()) / (coeffs.std() + 1e-6)
    return img[..., np.newaxis]  # shape (63, 999, 1)

# 4.2 Build CWT datasets for all train and test
X_train_cwt = np.stack([compute_cwt(x) for x in X_train_raw], axis=0)
X_test_cwt  = np.stack([compute_cwt(x) for x in X_test_raw],  axis=0)

print("X_train_cwt:", X_train_cwt.shape, "X_test_cwt:", X_test_cwt.shape)
# Expect (1360, 63, 999, 1) and (340, 63, 999, 1)


X_train_cwt: (1360, 63, 999, 1) X_test_cwt: (340, 63, 999, 1)


In [34]:
from tensorflow.keras import layers, Model

def build_stage2(input_shape, n_classes):
    inp = layers.Input(input_shape)            # (63, 999, 1)
    x   = layers.Conv2D(16, (3,3), padding="same", activation="relu")(inp)
    x   = layers.MaxPool2D((2,2))(x)           # (31, 499, 16)
    x   = layers.Conv2D(32, (3,3), padding="same", activation="relu")(x)
    x   = layers.MaxPool2D((2,2))(x)           # (15, 249, 32)
    # reshape to (time, features)
    t, h, c = x.shape[1], x.shape[2], x.shape[3]
    x   = layers.Reshape((t, h*c))(x)
    x   = layers.LSTM(64)(x)
    x   = layers.Dropout(0.3)(x)
    out = layers.Dense(n_classes, activation="softmax")(x)
    return Model(inp, out)

stage2_model = build_stage2(X_train_cwt.shape[1:], len(le.classes_))
stage2_model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)
stage2_model.summary()


In [35]:
# 6.1 tf.data pipelines
ds2_train = tf.data.Dataset.from_tensor_slices((X_train_cwt, y_train))\
    .shuffle(2000)\
    .batch(32)\
    .prefetch(tf.data.AUTOTUNE)

ds2_val   = tf.data.Dataset.from_tensor_slices((X_test_cwt, y_test))\
    .batch(32)\
    .prefetch(tf.data.AUTOTUNE)

# 6.2 Train with LR scheduling + early stop
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5,
                                         patience=3, min_lr=1e-5),
    tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=6,
                                     restore_best_weights=True)
]

history2 = stage2_model.fit(
    ds2_train,
    epochs=30,
    validation_data=ds2_val,
    callbacks=callbacks,
    verbose=2
)


Epoch 1/30
43/43 - 5s - 105ms/step - accuracy: 0.4213 - loss: 2.1397 - val_accuracy: 0.8529 - val_loss: 1.3531 - learning_rate: 0.0010
Epoch 2/30
43/43 - 1s - 34ms/step - accuracy: 0.8809 - loss: 0.9885 - val_accuracy: 0.9706 - val_loss: 0.6333 - learning_rate: 0.0010
Epoch 3/30
43/43 - 2s - 36ms/step - accuracy: 0.9750 - loss: 0.5114 - val_accuracy: 0.9882 - val_loss: 0.2980 - learning_rate: 0.0010
Epoch 4/30
43/43 - 2s - 37ms/step - accuracy: 0.9941 - loss: 0.2731 - val_accuracy: 0.9912 - val_loss: 0.1614 - learning_rate: 0.0010
Epoch 5/30
43/43 - 2s - 37ms/step - accuracy: 0.9949 - loss: 0.1597 - val_accuracy: 0.9912 - val_loss: 0.0944 - learning_rate: 0.0010
Epoch 6/30
43/43 - 2s - 36ms/step - accuracy: 0.9963 - loss: 0.1026 - val_accuracy: 0.9912 - val_loss: 0.0652 - learning_rate: 0.0010
Epoch 7/30
43/43 - 2s - 37ms/step - accuracy: 0.9963 - loss: 0.0688 - val_accuracy: 0.9912 - val_loss: 0.0495 - learning_rate: 0.0010
Epoch 8/30
43/43 - 2s - 37ms/step - accuracy: 0.9956 - loss: 

In [36]:
# 7.1 Stage2 predictions on ALL test samples
stage2_proba = stage2_model.predict(X_test_cwt, verbose=0)
stage2_preds = np.argmax(stage2_proba, axis=1)

# 7.2 Hybrid logic: SVM if confident, else Stage2
hybrid_preds = np.where(
    use_svm,            # boolean mask from Cell 3
    svm_preds,          # SVM’s top‑class idx
    stage2_preds        # fallback deep predictions
)

# 7.3 Compute overall hybrid accuracy
hybrid_acc = (hybrid_preds == y_test).mean()
print(f"Hybrid accuracy: {hybrid_acc:.3f}")
print(f"SVM covered {coverage*100:.1f}% with perfect accuracy, remaining {100-coverage*100:.1f}% handled by Stage2.")


Hybrid accuracy: 0.994
SVM covered 71.8% with perfect accuracy, remaining 28.2% handled by Stage2.


In [44]:
# Cell A (final) — Export as TensorFlow SavedModel
saved_path = "stage2_saved_model"
stage2_model.export(saved_path)
print("SavedModel exported to:", saved_path)

# Verify contents
get_ipython().system('ls -R stage2_saved_model')


Saved artifact at 'stage2_saved_model'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 63, 999, 1), dtype=tf.float32, name='keras_tensor_283')
Output Type:
  TensorSpec(shape=(None, 17), dtype=tf.float32, name=None)
Captures:
  139130930862928: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139126818540816: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139126818539088: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139126818541392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139126818537744: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139126818543120: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139126818541968: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139126818543696: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139126818544272: TensorSpec(shape=(), dtype=tf.resource, name=None)
SavedModel exported to: stage2_saved_model
stage2_saved_model:
assets	fingerprin

In [45]:
!pip install tf2onnx




In [46]:
import tf2onnx
import tensorflow as tf

# Assume stage2_model is your trained tf.keras Model
spec = (tf.TensorSpec((None, 999, 1), tf.float32, name="input"),)
model_proto, external_tensor_storage = tf2onnx.convert.from_keras(
    stage2_model,
    input_signature=spec,
    opset=13,
    output_path="stage2.onnx"
)
print("ONNX model saved to stage2.onnx")


ValueError: in user code:

    File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler  **
        raise e.with_traceback(filtered_tb) from None
    File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/input_spec.py", line 245, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "functional_10" is incompatible with the layer: expected shape=(None, 63, 999, 1), found shape=(None, 999)
