# 05_noise_robustness.ipynb — sweep shots & noise for QSVM + VQC

# Cell 0 — perf env

In [1]:
# ==== Perf environment (set before heavy imports) ====
import os
os.environ.setdefault("OMP_NUM_THREADS", "8")
os.environ.setdefault("OPENBLAS_NUM_THREADS", "8")
os.environ.setdefault("MKL_NUM_THREADS", "8")
os.environ.setdefault("NUMEXPR_NUM_THREADS", "8")
print("BLAS threads:",
      os.environ.get("OMP_NUM_THREADS"),
      os.environ.get("OPENBLAS_NUM_THREADS"),
      os.environ.get("MKL_NUM_THREADS"),
      os.environ.get("NUMEXPR_NUM_THREADS"))

# Timing helpers
import time, json
from contextlib import contextmanager
from collections import defaultdict
class PhaseTimer:
    def __init__(self): self.t = defaultdict(float)
    @contextmanager
    def timed(self, key):
        t0 = time.perf_counter()
        yield
        self.t[key] += time.perf_counter() - t0
    def add(self, key, seconds): self.t[key] += seconds
    def to_dict(self): return dict(self.t)

def pretty_seconds(sec):
    return f"{sec/60:.1f} min" if sec >= 60 else f"{sec:.1f} s"

BLAS threads: 8 8 8 8


# Cell 1 — imports & dirs

In [2]:
from pathlib import Path
import itertools, warnings, numpy as np, pandas as pd
from IPython.display import display
import pennylane as qml
from pennylane import numpy as pnp
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import (accuracy_score, precision_recall_fscore_support, roc_auc_score, confusion_matrix)

warnings.filterwarnings("ignore")

# Safe import for BasicEntanglerLayers across PL versions
try:
    BasicEntanglerLayers = qml.BasicEntanglerLayers
except AttributeError:
    from pennylane.templates.layers import BasicEntanglerLayers

ROOT = Path("."); PROCESSED = ROOT/"data/processed"; RESULTS = ROOT/"results"
(RESULTS / "metrics").mkdir(parents=True, exist_ok=True)
(RESULTS / "kernels").mkdir(parents=True, exist_ok=True)
(RESULTS / "cache").mkdir(parents=True, exist_ok=True)

np.random.seed(17); pnp.random.seed(17)

# Cell 2 — knobs (speed vs accuracy)

In [3]:
# Presets: "turbo" (fastest), "fast" (quick), "full" (thorough)
PRESET = "turbo"  # ← change to "fast" or "full" when you want

if PRESET == "turbo":
    D = 6
    MAX_TR = 160
    M_ANCHORS = 96
    shots_grid  = [256]
    pflip_grid  = [0.0, 0.01]
    pdepol_grid = [0.0]      # no pdepol for speed
elif PRESET == "fast":
    D = 6
    MAX_TR = 220
    M_ANCHORS = 128
    shots_grid  = [256, 512]
    pflip_grid  = [0.0, 0.01]
    pdepol_grid = [0.0, 0.01]
else:  # "full"
    D = 6
    MAX_TR = 300
    M_ANCHORS = 192
    shots_grid  = [512, 2000]
    pflip_grid  = [0.0, 0.01]
    pdepol_grid = [0.0, 0.01]

SWEEP = list(itertools.product(shots_grid, pflip_grid, pdepol_grid))
print(f"Preset={PRESET} | Sweep size:", len(SWEEP), "configs")

Preset=turbo | Sweep size: 2 configs


# Cell 3 — load & PCA

In [4]:
data = np.load(PROCESSED/"encodings.npz", allow_pickle=True)
import json
with open(PROCESSED/"splits.json") as f: SPL = json.load(f)
y = data["y"].astype(int); X_kmer = data["kmer"].astype(np.float32)
tr_idx = np.array(SPL["train"]); va_idx = np.array(SPL["val"]); te_idx = np.array(SPL["test"])

pca = PCA(n_components=D, random_state=17); scaler = StandardScaler(with_mean=True, with_std=True)
X_tr_pca = pca.fit_transform(X_kmer[tr_idx]); X_va_pca = pca.transform(X_kmer[va_idx]); X_te_pca = pca.transform(X_kmer[te_idx])
Xtr = scaler.fit_transform(X_tr_pca).astype(np.float32); Xva = scaler.transform(X_va_pca).astype(np.float32); Xte = scaler.transform(X_te_pca).astype(np.float32)
ytr, yva, yte = y[tr_idx], y[va_idx], y[te_idx]
sel = np.arange(min(MAX_TR, len(Xtr))); Xtr_s, ytr_s = Xtr[sel], ytr[sel]
print("Shapes:", Xtr.shape, Xva.shape, Xte.shape, "| train subset:", Xtr_s.shape)

Shapes: (894, 6) (298, 6) (298, 6) | train subset: (160, 6)


# Cell 4 — VQC helpers

In [5]:
def _make_sv_device():
    try:
        return qml.device("lightning.qubit", wires=D, shots=None)
    except Exception:
        return qml.device("default.qubit",  wires=D, shots=None)

def _vqc_layer_clean(x, w):
    qml.AngleEmbedding(x, wires=range(D), rotation="Y")
    BasicEntanglerLayers(w[None, :], wires=range(D))

_dev_vqc = _make_sv_device()

@qml.qnode(_dev_vqc, interface=None)
def _vqc_clean(x, weights):
    for l in range(weights.shape[0]):
        _vqc_layer_clean(x, weights[l])
    return qml.expval(qml.PauliZ(0))

# --- MC Pauli noise (unitary) ---
def _sample_noise_masks(rng, pflip, pdepol):
    flips = (rng.random(D) < pflip).astype(np.int8)
    pa = np.zeros(D, dtype=np.int8)
    mask = rng.random(D) < pdepol
    if mask.any():
        pa[mask] = rng.integers(1, 4, size=int(mask.sum()))  # 1:X,2:Y,3:Z
    return flips, pa

def _apply_noise(pa, flips):
    for i in range(D):
        lab = int(pa[i])
        if lab == 1: qml.PauliX(i)
        elif lab == 2: qml.PauliY(i)
        elif lab == 3: qml.PauliZ(i)
    if flips.any():
        for i in range(D):
            if flips[i]: qml.PauliX(i)

@qml.qnode(_dev_vqc, interface=None)
def _vqc_noisy_once(x, weights, flips, pa):
    for l in range(weights.shape[0]):
        qml.AngleEmbedding(x, wires=range(D), rotation="Y")
        _apply_noise(pa, flips)                     # inject between embeds/entanglers
        BasicEntanglerLayers(weights[l][None, :], wires=range(D))
    return qml.expval(qml.PauliZ(0))

# Cell 5 — load trained VQC weights (from 04)

In [6]:
W_PATH = RESULTS / "vqc_weights.npy"
if not W_PATH.exists():
    raise FileNotFoundError("Run 04_quantum_vqc.ipynb first to save weights.")
weights = pnp.array(np.load(W_PATH), requires_grad=False)
weights.shape  # (L, D)

(2, 6)

# Cell 6 — VQC predict & metrics

In [7]:
def vqc_predict_proba(X, weights, pflip=0.0, pdepol=0.0, shots=0):
    w = np.asarray(weights, dtype=float)
    rng = np.random.default_rng(123)
    out = []

    if pflip == 0.0 and pdepol == 0.0:
        for xi in X:
            m = float(_vqc_clean(xi, w))
            out.append((1.0 + m)/2.0)
    else:
        S = max(16, shots // 32) if shots else 32   # fewer samples → much faster
        for xi in X:
            acc = 0.0
            for _ in range(S):
                flips, pa = _sample_noise_masks(rng, pflip, pdepol)
                m = float(_vqc_noisy_once(xi, w, flips, pa))
                acc += (1.0 + m)/2.0
            out.append(acc / S)

    return np.clip(np.array(out, dtype=float), 1e-6, 1.0-1e-6)

def metrics_from_probs(p, y, split):
    yhat = (p >= 0.5).astype(int)
    acc = accuracy_score(y, yhat)
    prec, rec, f1, _ = precision_recall_fscore_support(y, yhat, average="binary", zero_division=0)
    try: auc = roc_auc_score(y, p)
    except ValueError: auc = float("nan")
    cm = confusion_matrix(y, yhat)
    return dict(split=split, acc=acc, prec=prec, rec=rec, f1=f1, auc=auc), cm

# Cell 7 — kernel utils (pure + MC adjoint) & Nyström

In [8]:
def _entangle_ring(ws):
    N = len(ws)
    for i in range(N):
        qml.CZ(wires=[ws[i], ws[(i+1)%N]])

def save_npz(path, **arrays): np.savez_compressed(path, **arrays)
def load_npz(path): return dict(np.load(path, allow_pickle=True)) if Path(path).exists() else None

def normalize_block(K, da, db):
    da = np.where(da <= 1e-12, 1e-12, da)
    db = np.where(db <= 1e-12, 1e-12, db)
    return np.clip(K, 0.0, 1.0) / (np.sqrt(np.outer(da, db)) + 1e-12)

# ---- Pure (noiseless) QSVM kernel via states → matmul ----
def make_pure_state_getter():
    try:
        dev = qml.device("lightning.qubit", wires=D, shots=None)
    except Exception:
        dev = qml.device("default.qubit", wires=D, shots=None)

    @qml.qnode(dev)
    def phi(x):
        qml.AngleEmbedding(x, wires=range(D), rotation="Y")
        _entangle_ring(list(range(D)))
        return qml.state()
    return phi

def states_batch(X, get_state, dtype=np.complex64):
    return np.stack([get_state(x) for x in X]).astype(dtype, copy=False)

def kernel_from_states(SA, SB):
    M = SA @ SB.conj().T
    return np.abs(M)**2

# ---- Noisy QSVM kernel: batched MC via states (+independent masks) ----
def _noise_masks(rng, pflip, pdepol):
    flips = (rng.random(D) < pflip).astype(np.int8)
    pa = np.zeros(D, dtype=np.int8)
    mask = rng.random(D) < pdepol
    if mask.any():
        pa[mask] = rng.integers(1, 4, size=int(mask.sum()))
    return flips, pa

def _build_noisy_state_qnode():
    try:
        dev = qml.device("lightning.qubit", wires=D, shots=None)
    except Exception:
        dev = qml.device("default.qubit", wires=D, shots=None)

    def _apply(pa, flips):
        for i in range(D):
            lab = int(pa[i])
            if lab == 1: qml.PauliX(i)
            elif lab == 2: qml.PauliY(i)
            elif lab == 3: qml.PauliZ(i)
        if flips.any():
            for i in range(D):
                if flips[i]: qml.PauliX(i)

    @qml.qnode(dev)
    def psi(x, flips, pa):
        qml.AngleEmbedding(x, wires=range(D), rotation="Y")
        _apply(pa, flips)                 # pre-entangle
        _entangle_ring(list(range(D)))
        _apply(pa, flips)                 # post-entangle
        return qml.state()
    return psi

def _states_noise_batch(X, flips, pa, psi):
    return np.stack([psi(x, flips, pa) for x in X]).astype(np.complex64, copy=False)

def mc_blocks_via_states(Xtr_s, Xva, Xte, A, pflip, pdepol, S=24, seed=123):
    """Return (K_MM, K_trM, K_vaM, K_teM, d_M, d_tr, d_va, d_te) averaged over S MC pairs."""
    psi = _build_noisy_state_qnode()
    rng = np.random.default_rng(seed)
    M = len(A)

    K_MM = np.zeros((M, M), dtype=float)
    K_trM = np.zeros((len(Xtr_s), M), dtype=float)
    K_vaM = np.zeros((len(Xva),   M), dtype=float)
    K_teM = np.zeros((len(Xte),   M), dtype=float)
    d_M  = np.zeros(M, dtype=float)
    d_tr = np.zeros(len(Xtr_s), dtype=float)
    d_va = np.zeros(len(Xva),   dtype=float)
    d_te = np.zeros(len(Xte),   dtype=float)

    for _ in range(S):
        flipsL, paL = _noise_masks(rng, pflip, pdepol)   # left masks
        flipsR, paR = _noise_masks(rng, pflip, pdepol)   # right masks

        A_L   = _states_noise_batch(A,     flipsL, paL, psi)
        A_R   = _states_noise_batch(A,     flipsR, paR, psi)
        tr_L  = _states_noise_batch(Xtr_s, flipsL, paL, psi)
        tr_R  = _states_noise_batch(Xtr_s, flipsR, paR, psi)
        va_L  = _states_noise_batch(Xva,   flipsL, paL, psi)
        va_R  = _states_noise_batch(Xva,   flipsR, paR, psi)
        te_L  = _states_noise_batch(Xte,   flipsL, paL, psi)
        te_R  = _states_noise_batch(Xte,   flipsR, paR, psi)

        # Gram blocks for this sample
        K_MM  += np.abs(A_L  @ A_R.conj().T)**2
        K_trM += np.abs(tr_L @ A_R.conj().T)**2
        K_vaM += np.abs(va_L @ A_R.conj().T)**2
        K_teM += np.abs(te_L @ A_R.conj().T)**2

        # Diagonals k(x,x) for normalization (left vs right)
        d_M  += np.abs(np.sum(A_L  * A_R.conj(), axis=1))**2
        d_tr += np.abs(np.sum(tr_L * tr_R.conj(), axis=1))**2
        d_va += np.abs(np.sum(va_L * va_R.conj(), axis=1))**2
        d_te += np.abs(np.sum(te_L * te_R.conj(), axis=1))**2

    invS = 1.0/float(S)
    return (K_MM*invS, K_trM*invS, K_vaM*invS, K_teM*invS,
            d_M*invS,   d_tr*invS,  d_va*invS,  d_te*invS)

# Cell 8 — VQC sweep (timed; CSV)

In [9]:
timer_v = PhaseTimer(); rows = []
with timer_v.timed(f"VQC_sweep_total_{len(SWEEP)}"):
    for shots, pflip, pdepol in SWEEP:
        with timer_v.timed(f"VQC_forward_{shots}_{pflip}_{pdepol}"):
            p_tr = vqc_predict_proba(Xtr, weights, pflip=pflip, pdepol=pdepol, shots=shots)
            p_va = vqc_predict_proba(Xva, weights, pflip=pflip, pdepol=pdepol, shots=shots)
            p_te = vqc_predict_proba(Xte, weights, pflip=pflip, pdepol=pdepol, shots=shots)
        with timer_v.timed(f"VQC_metrics_{shots}_{pflip}_{pdepol}"):
            m_tr, _ = metrics_from_probs(p_tr, ytr, "train")
            m_va, _ = metrics_from_probs(p_va, yva, "val")
            m_te, _ = metrics_from_probs(p_te, yte, "test")
            for m in [m_tr, m_va, m_te]:
                m.update(dict(model="VQC", shots=shots, pflip=pflip, pdepol=pdepol))
                rows.append(m)
df_vqc = pd.DataFrame(rows); df_vqc.to_csv(RESULTS/"metrics/noise_sweep_vqc.csv", index=False)
print("VQC sweep rows:", len(df_vqc)); display(df_vqc.head())

VQC sweep rows: 6


Unnamed: 0,split,acc,prec,rec,f1,auc,model,shots,pflip,pdepol
0,train,0.781879,0.932692,0.82303,0.874437,0.589565,VQC,256,0.0,0.0
1,val,0.785235,0.95102,0.817544,0.879245,0.535493,VQC,256,0.0,0.0
2,test,0.738255,0.919149,0.785455,0.847059,0.510988,VQC,256,0.0,0.0
3,train,0.782998,0.931601,0.825455,0.875321,0.58881,VQC,256,0.01,0.0
4,val,0.778523,0.950617,0.810526,0.875,0.530364,VQC,256,0.01,0.0


# Cell 9 — QSVM sweep (pure cached + MC noisy Nyström; timed; CSV)

In [10]:
from numpy.linalg import eigh

rows = []
timer_q = PhaseTimer()

# ---------- Pure-kernel cache fingerprint ----------
fp = f"D{D}_N{len(Xtr_s)}_pca17_scaler"
PURE_KERNELS_FILE = RESULTS / f"cache/pure_kernels_{fp}.npz"

pure_cached = load_npz(PURE_KERNELS_FILE)
if pure_cached:
    K_trtr_n = pure_cached["K_trtr_n"]; K_vatr_n = pure_cached["K_vatr_n"]; K_tetr_n = pure_cached["K_tetr_n"]
    print("Loaded pure kernels from cache.")
else:
    K_trtr_n = K_vatr_n = K_tetr_n = None

# ---------- (If needed) lightweight helpers for batched noisy path ----------
if "mc_blocks_via_states" not in globals():
    def _noise_masks(rng, pflip, pdepol):
        flips = (rng.random(D) < pflip).astype(np.int8)
        pa = np.zeros(D, dtype=np.int8)
        mask = rng.random(D) < pdepol
        if mask.any():
            pa[mask] = rng.integers(1, 4, size=int(mask.sum()))  # 1:X,2:Y,3:Z
        return flips, pa

    def _entangle_ring(ws):
        N = len(ws)
        for i in range(N):
            qml.CZ(wires=[ws[i], ws[(i+1)%N]])

    def _build_noisy_state_qnode():
        try:
            dev = qml.device("lightning.qubit", wires=D, shots=None)
        except Exception:
            dev = qml.device("default.qubit", wires=D, shots=None)

        def _apply(pa, flips):
            for i in range(D):
                lab = int(pa[i])
                if lab == 1: qml.PauliX(i)
                elif lab == 2: qml.PauliY(i)
                elif lab == 3: qml.PauliZ(i)
            if flips.any():
                for i in range(D):
                    if flips[i]: qml.PauliX(i)

        @qml.qnode(dev)
        def psi(x, flips, pa):
            qml.AngleEmbedding(x, wires=range(D), rotation="Y")
            _apply(pa, flips)                 # pre-entangle
            _entangle_ring(list(range(D)))
            _apply(pa, flips)                 # post-entangle
            return qml.state()

        return psi

    def _states_noise_batch(X, flips, pa, psi):
        return np.stack([psi(x, flips, pa) for x in X]).astype(np.complex64, copy=False)

    def mc_blocks_via_states(Xtr_s, Xva, Xte, A, pflip, pdepol, S=24, seed=123):
        """Return (K_MM, K_trM, K_vaM, K_teM, d_M, d_tr, d_va, d_te) averaged over S MC pairs."""
        psi = _build_noisy_state_qnode()
        rng = np.random.default_rng(seed)
        M = len(A)

        K_MM = np.zeros((M, M), dtype=float)
        K_trM = np.zeros((len(Xtr_s), M), dtype=float)
        K_vaM = np.zeros((len(Xva),   M), dtype=float)
        K_teM = np.zeros((len(Xte),   M), dtype=float)
        d_M  = np.zeros(M, dtype=float)
        d_tr = np.zeros(len(Xtr_s), dtype=float)
        d_va = np.zeros(len(Xva),   dtype=float)
        d_te = np.zeros(len(Xte),   dtype=float)

        for _ in range(S):
            flipsL, paL = _noise_masks(rng, pflip, pdepol)   # left masks
            flipsR, paR = _noise_masks(rng, pflip, pdepol)   # right masks

            A_L   = _states_noise_batch(A,     flipsL, paL, psi)
            A_R   = _states_noise_batch(A,     flipsR, paR, psi)
            tr_L  = _states_noise_batch(Xtr_s, flipsL, paL, psi)
            tr_R  = _states_noise_batch(Xtr_s, flipsR, paR, psi)
            va_L  = _states_noise_batch(Xva,   flipsL, paL, psi)
            va_R  = _states_noise_batch(Xva,   flipsR, paR, psi)
            te_L  = _states_noise_batch(Xte,   flipsL, paL, psi)
            te_R  = _states_noise_batch(Xte,   flipsR, paR, psi)

            # Gram blocks for this MC sample
            K_MM  += np.abs(A_L  @ A_R.conj().T)**2
            K_trM += np.abs(tr_L @ A_R.conj().T)**2
            K_vaM += np.abs(va_L @ A_R.conj().T)**2
            K_teM += np.abs(te_L @ A_R.conj().T)**2

            # Diagonals k(x,x) for normalization
            d_M  += np.abs(np.sum(A_L  * A_R.conj(), axis=1))**2
            d_tr += np.abs(np.sum(tr_L * tr_R.conj(), axis=1))**2
            d_va += np.abs(np.sum(va_L * va_R.conj(), axis=1))**2
            d_te += np.abs(np.sum(te_L * te_R.conj(), axis=1))**2

        invS = 1.0 / float(S)
        return (K_MM*invS, K_trM*invS, K_vaM*invS, K_teM*invS,
                d_M*invS,  d_tr*invS,  d_va*invS,  d_te*invS)

# ---------- Run sweep ----------
with timer_q.timed(f"QSVM_sweep_total_{len(SWEEP)}"):
    for shots, pflip, pdepol in SWEEP:
        print(f"\n[QSVM] shots={shots}, pflip={pflip}, pdepol={pdepol}")
        noiseless = (pflip == 0.0 and pdepol == 0.0)

        if noiseless:
            # --- PURE / NOISELESS: reuse cached Gram matrices if present ---
            if K_trtr_n is None:
                with timer_q.timed("pure_states_and_gram"):
                    get_state = make_pure_state_getter()
                    S_tr = states_batch(Xtr_s, get_state)
                    S_va = states_batch(Xva,   get_state)
                    S_te = states_batch(Xte,   get_state)
                    K_trtr = kernel_from_states(S_tr, S_tr)
                    K_vatr = kernel_from_states(S_va, S_tr)
                    K_tetr = kernel_from_states(S_te, S_tr)

                d_tr = np.clip(np.diag(K_trtr), 1e-12, 1.0)
                d_va = np.clip(np.diag(kernel_from_states(S_va, S_va)), 1e-12, 1.0)
                d_te = np.clip(np.diag(kernel_from_states(S_te, S_te)), 1e-12, 1.0)

                with timer_q.timed("pure_normalize"):
                    K_trtr_n = normalize_block(0.5*(K_trtr + K_trtr.T), d_tr, d_tr) + 1e-8*np.eye(len(d_tr))
                    K_vatr_n = normalize_block(K_vatr, d_va, d_tr)
                    K_tetr_n = normalize_block(K_tetr, d_te, d_tr)

                save_npz(PURE_KERNELS_FILE, K_trtr_n=K_trtr_n, K_vatr_n=K_vatr_n, K_tetr_n=K_tetr_n)
                print("Saved pure kernels to cache.")

            if np.unique(ytr_s).size < 2:
                print("  ! Skip pure branch — single-class training slice.")
            else:
                with timer_q.timed("pure_svc_fit"):
                    clf = SVC(C=5.0, kernel="precomputed", probability=True, class_weight="balanced", random_state=0)
                    clf.fit(K_trtr_n, ytr_s)

                def evalK(K, y, split):
                    t0 = time.perf_counter()
                    try:
                        prob = clf.predict_proba(K)[:, 1]
                    except Exception:
                        df = clf.decision_function(K); prob = 1.0/(1.0+np.exp(-df))
                    prob = np.clip(prob, 1e-6, 1-1e-6)
                    pred = (prob >= 0.5).astype(int)
                    acc = accuracy_score(y, pred)
                    prec, rec, f1, _ = precision_recall_fscore_support(y, pred, average="binary", zero_division=0)
                    try: auc = roc_auc_score(y, prob)
                    except ValueError: auc = float("nan")
                    timer_q.add(f"pure_eval_{split}", time.perf_counter()-t0)
                    return dict(split=split, acc=acc, prec=prec, rec=rec, f1=f1, auc=auc)

                rows += [
                    {**evalK(K_trtr_n, ytr_s, "train"), "model":"QSVM", "shots":shots, "pflip":pflip, "pdepol":pdepol},
                    {**evalK(K_vatr_n, yva,   "val"),   "model":"QSVM", "shots":shots, "pflip":pflip, "pdepol":pdepol},
                    {**evalK(K_tetr_n, yte,   "test"),  "model":"QSVM", "shots":shots, "pflip":pflip, "pdepol":pdepol},
                ]

        else:
            # --- NOISY: batched statevector MC + Nyström (fast) ---
            S_NOISE = max(16, shots // 32)  # fewer MC samples → faster
            rng = np.random.default_rng(123)  # deterministic anchors
            idx_anchor = rng.choice(len(Xtr_s), size=min(M_ANCHORS, len(Xtr_s)), replace=False)
            A = Xtr_s[idx_anchor]

            with timer_q.timed("noisy_blocks_states"):
                K_MM, K_trM, K_vaM, K_teM, d_M, d_tr, d_va, d_te = mc_blocks_via_states(
                    Xtr_s, Xva, Xte, A, pflip, pdepol, S=S_NOISE, seed=123
                )

            with timer_q.timed("noisy_normalize"):
                def norm(K, da, db):
                    da = np.where(da <= 1e-12, 1e-12, da)
                    db = np.where(db <= 1e-12, 1e-12, db)
                    return np.clip(K, 0.0, 1.0) / (np.sqrt(np.outer(da, db)) + 1e-12)
                K_MM_n  = norm(0.5*(K_MM + K_MM.T), d_M, d_M) + 1e-8*np.eye(len(d_M))
                K_trM_n = norm(K_trM, d_tr, d_M)
                K_vaM_n = norm(K_vaM, d_va, d_M)
                K_teM_n = norm(K_teM, d_te, d_M)

            with timer_q.timed("noisy_features"):
                w, V = eigh(K_MM_n)
                Winv_sqrt = V @ np.diag(1.0/np.sqrt(np.clip(w + 1e-6, 1e-12, None))) @ V.T
                Phi_tr = K_trM_n @ Winv_sqrt
                Phi_va = K_vaM_n @ Winv_sqrt
                Phi_te = K_teM_n @ Winv_sqrt

            if np.unique(ytr_s).size < 2:
                print("  ! Skip — single-class training slice.")
            else:
                with timer_q.timed("noisy_linear_svc_fit"):
                    clf = SVC(C=5.0, kernel="linear", probability=True, class_weight="balanced", random_state=0)
                    clf.fit(Phi_tr, ytr_s)

                def eval_feat(F, y, split):
                    t0 = time.perf_counter()
                    try: prob = clf.predict_proba(F)[:, 1]
                    except Exception:
                        df = clf.decision_function(F); prob = 1.0/(1.0+np.exp(-df))
                    prob = np.clip(prob, 1e-6, 1.0-1e-6)
                    pred = (prob >= 0.5).astype(int)
                    acc = accuracy_score(y, pred)
                    prec, rec, f1, _ = precision_recall_fscore_support(y, pred, average="binary", zero_division=0)
                    try: auc = roc_auc_score(y, prob)
                    except ValueError: auc = float("nan")
                    timer_q.add(f"noisy_eval_{split}", time.perf_counter()-t0)
                    return dict(split=split, acc=acc, prec=prec, rec=rec, f1=f1, auc=auc)

                rows += [
                    {**eval_feat(Phi_tr, ytr_s, "train"), "model":"QSVM", "shots":shots, "pflip":pflip, "pdepol":pdepol, "anchors": int(len(A)), "S_NOISE": int(S_NOISE)},
                    {**eval_feat(Phi_va, yva,   "val"),   "model":"QSVM", "shots":shots, "pflip":pflip, "pdepol":pdepol, "anchors": int(len(A)), "S_NOISE": int(S_NOISE)},
                    {**eval_feat(Phi_te, yte,   "test"),  "model":"QSVM", "shots":shots, "pflip":pflip, "pdepol":pdepol, "anchors": int(len(A)), "S_NOISE": int(S_NOISE)},
                ]

df_qsvm = pd.DataFrame(rows)
df_qsvm.to_csv(RESULTS/"metrics/noise_sweep_qsvm.csv", index=False)
print("QSVM sweep rows:", len(df_qsvm))
display(df_qsvm.head())



[QSVM] shots=256, pflip=0.0, pdepol=0.0
Saved pure kernels to cache.

[QSVM] shots=256, pflip=0.01, pdepol=0.0
QSVM sweep rows: 6


Unnamed: 0,split,acc,prec,rec,f1,auc,model,shots,pflip,pdepol,anchors,S_NOISE
0,train,0.93125,0.930818,1.0,0.964169,1.0,QSVM,256,0.0,0.0,,
1,val,0.946309,0.955932,0.989474,0.972414,0.453171,QSVM,256,0.0,0.0,,
2,test,0.922819,0.925676,0.996364,0.95972,0.733755,QSVM,256,0.0,0.0,,
3,train,0.925,0.93038,0.993243,0.960784,0.987613,QSVM,256,0.01,0.0,96.0,16.0
4,val,0.949664,0.956081,0.992982,0.974182,0.461269,QSVM,256,0.01,0.0,96.0,16.0


# Cell 10 — pivots & JSON run report (with real timers)

In [11]:
METRICS_DIR = RESULTS / "metrics"
pv_vqc  = (df_vqc[df_vqc["split"]=="test"].pivot_table(index=["shots","pflip","pdepol"], values=["f1","auc","acc"], aggfunc="mean").sort_values("f1", ascending=False))
pv_qsvm = (df_qsvm[df_qsvm["split"]=="test"].pivot_table(index=["shots","pflip","pdepol"], values=["f1","auc","acc"], aggfunc="mean").sort_values("f1", ascending=False))

pv_vqc.to_csv(METRICS_DIR/"pivot_vqc_test.csv")
pv_qsvm.to_csv(METRICS_DIR/"pivot_qsvm_test.csv")

run_report = {
    "config": {
        "D": int(D), "MAX_TR": int(len(Xtr_s)), "M_ANCHORS": int(M_ANCHORS),
        "sweep_size": int(len(SWEEP)),
        "shots_grid": shots_grid, "pflip_grid": pflip_grid, "pdepol_grid": pdepol_grid,
    },
    "timing": {
        "VQC": timer_v.to_dict(),
        "QSVM": timer_q.to_dict(),
    }
}
with open(METRICS_DIR/"noise_sweep_run_report.json", "w") as f:
    json.dump(run_report, f, indent=2)

print("\n=== Run timing summary ===")
for k, v in timer_v.to_dict().items():
    print(f"VQC:{k:>30}  {pretty_seconds(v)}")
for k, v in timer_q.to_dict().items():
    print(f"QSVM:{k:>29}  {pretty_seconds(v)}")

print("\nSaved:")
print(" - results/metrics/noise_sweep_vqc.csv")
print(" - results/metrics/noise_sweep_qsvm.csv")
print(" - results/metrics/pivot_vqc_test.csv")
print(" - results/metrics/pivot_qsvm_test.csv")
print(" - results/metrics/noise_sweep_run_report.json")


=== Run timing summary ===
VQC:       VQC_forward_256_0.0_0.0  3.0 s
VQC:       VQC_metrics_256_0.0_0.0  0.0 s
VQC:      VQC_forward_256_0.01_0.0  41.2 s
VQC:      VQC_metrics_256_0.01_0.0  0.0 s
VQC:             VQC_sweep_total_2  44.2 s
QSVM:         pure_states_and_gram  0.9 s
QSVM:               pure_normalize  0.0 s
QSVM:                 pure_svc_fit  0.0 s
QSVM:              pure_eval_train  0.0 s
QSVM:                pure_eval_val  0.0 s
QSVM:               pure_eval_test  0.0 s
QSVM:          noisy_blocks_states  30.3 s
QSVM:              noisy_normalize  0.0 s
QSVM:               noisy_features  0.0 s
QSVM:         noisy_linear_svc_fit  0.0 s
QSVM:             noisy_eval_train  0.0 s
QSVM:               noisy_eval_val  0.0 s
QSVM:              noisy_eval_test  0.0 s
QSVM:           QSVM_sweep_total_2  31.3 s

Saved:
 - results/metrics/noise_sweep_vqc.csv
 - results/metrics/noise_sweep_qsvm.csv
 - results/metrics/pivot_vqc_test.csv
 - results/metrics/pivot_qsvm_test.csv
 - res