# 03_quantum_kernel.ipynb — QSVM (precomputed kernel)

# Cell 0 — perf env

In [1]:
# Normalize thread usage for reproducible classical linear algebra performance
import os
os.environ.setdefault("OMP_NUM_THREADS", "8")
os.environ.setdefault("OPENBLAS_NUM_THREADS", "8")
os.environ.setdefault("MKL_NUM_THREADS", "8")
os.environ.setdefault("NUMEXPR_NUM_THREADS", "8")

'8'

# Cell 1 — imports & paths

In [2]:
# Imports: classical preprocessing + PennyLane for quantum kernel construction
from pathlib import Path
import json, warnings, numpy as np, pandas as pd

import pennylane as qml
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import (accuracy_score, precision_recall_fscore_support, roc_auc_score, confusion_matrix)
from sklearn.svm import SVC

warnings.filterwarnings("ignore")

ROOT = Path(".")
PROCESSED = ROOT / "data" / "processed"
RESULTS = ROOT / "results"
(RESULTS / "kernels").mkdir(parents=True, exist_ok=True)
(RESULTS / "metrics").mkdir(parents=True, exist_ok=True)

np.random.seed(7)

# Cell 2 — load & PCA→angles

In [3]:
# Load k-mer features, apply PCA for dimensionality reduction, then scale & map to rotation angles
data = np.load(PROCESSED/"encodings.npz", allow_pickle=True)
with open(PROCESSED/"splits.json") as f:
    SPL = json.load(f)
y = data["y"]; X_kmer = data["kmer"].astype(np.float32)
tr_idx = np.array(SPL["train"]); va_idx = np.array(SPL["val"]); te_idx = np.array(SPL["test"])

D = 8  # Number of principal components / qubits
pca = PCA(n_components=D, random_state=7)
X_tr_p = pca.fit_transform(X_kmer[tr_idx])
X_va_p = pca.transform(X_kmer[va_idx])
X_te_p = pca.transform(X_kmer[te_idx])

scaler = StandardScaler(with_mean=True, with_std=True)
X_tr_z = scaler.fit_transform(X_tr_p); X_va_z = scaler.transform(X_va_p); X_te_z = scaler.transform(X_te_p)
to_angles = lambda X: np.pi * np.clip(X, -3.0, 3.0)/3.0
Xtr = to_angles(X_tr_z).astype(np.float32); Xva = to_angles(X_va_z).astype(np.float32); Xte = to_angles(X_te_z).astype(np.float32)

# Cell 3 — device + kernel circuit

In [4]:
# Define embedding circuit and kernel evaluation (overlap) using an adjoint construction

def make_device(n_wires, shots=None):
    try:
        return qml.device("lightning.qubit", wires=n_wires, shots=shots)
    except Exception:
        return qml.device("default.qubit", wires=n_wires, shots=shots)

n_wires = D
wires = list(range(n_wires))
dev = make_device(n_wires, shots=None)

def cz_ring(ws):
    n = len(ws)
    for i in range(n):
        qml.CZ(wires=[ws[i], ws[(i+1) % n]])

def U(x):
    qml.AngleEmbedding(x, wires=wires, rotation="Y")
    cz_ring(wires)

@qml.qnode(dev)
def kernel_circuit(x1, x2):
    U(x1)
    qml.adjoint(U)(x2)
    # Return fidelity with |0...0>; equals squared inner product of embedded states
    return qml.expval(qml.Projector([0]*n_wires, wires=wires))

# Cell 4 — Gram matrices

In [5]:
# Build Gram matrices (train-train, val-train, test-train) via pairwise quantum kernel evaluations
def gram_matrix(XA, XB):
    K = np.zeros((len(XA), len(XB)), dtype=np.float64)
    for i in range(len(XA)):
        for j in range(len(XB)):
            K[i, j] = kernel_circuit(XA[i], XB[j])
        if (i+1) % 50 == 0 or i == len(XA)-1:
            print(f" row {i+1}/{len(XA)} ready")
    return K

MAX_TRAIN = 2000  # Truncate training set for kernel cost; set None for full usage
sel_tr = tr_idx[: (MAX_TRAIN or len(tr_idx))]
Xtr_sel, ytr_sel = Xtr[:len(sel_tr)], y[sel_tr]

K_trtr = gram_matrix(Xtr_sel, Xtr_sel)
K_vatr = gram_matrix(Xva, Xtr_sel)
K_tetr = gram_matrix(Xte, Xtr_sel)

np.save(RESULTS/"kernels/K_trtr.npy", K_trtr)
np.save(RESULTS/"kernels/K_vatr.npy", K_vatr)
np.save(RESULTS/"kernels/K_tetr.npy", K_tetr)
"saved gram matrices"

 row 50/894 ready
 row 100/894 ready
 row 100/894 ready
 row 150/894 ready
 row 150/894 ready
 row 200/894 ready
 row 200/894 ready
 row 250/894 ready
 row 250/894 ready
 row 300/894 ready
 row 300/894 ready
 row 350/894 ready
 row 350/894 ready
 row 400/894 ready
 row 400/894 ready
 row 450/894 ready
 row 450/894 ready
 row 500/894 ready
 row 500/894 ready
 row 550/894 ready
 row 550/894 ready
 row 600/894 ready
 row 600/894 ready
 row 650/894 ready
 row 650/894 ready
 row 700/894 ready
 row 700/894 ready
 row 850/894 ready
 row 894/894 ready
 row 50/298 ready
 row 100/298 ready
 row 150/298 ready
 row 200/298 ready
 row 250/298 ready
 row 298/298 ready
 row 50/298 ready
 row 100/298 ready
 row 150/298 ready
 row 200/298 ready
 row 250/298 ready
 row 298/298 ready


'saved gram matrices'

# Cell 5 — SVM on quantum kernel

In [6]:
# Train SVM with precomputed quantum kernel; report metrics per split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score, confusion_matrix
import pandas as pd

def evaluate_precomputed(K_train, y_train, K_split, y_split, split_name, C=5.0, thr=None):
    clf = SVC(C=C, kernel="precomputed", probability=True, class_weight="balanced", random_state=0)
    clf.fit(K_train, y_train)
    p = clf.predict_proba(K_split)[:,1]
    if thr is None:
        thr = 0.5
    yhat = (p >= thr).astype(int)
    acc = accuracy_score(y_split, yhat)
    prec, rec, f1, _ = precision_recall_fscore_support(y_split, yhat, average="binary", zero_division=0)
    try:
        auc = roc_auc_score(y_split, p)
    except Exception:
        auc = float("nan")
    cm = confusion_matrix(y_split, yhat)
    return clf, dict(split=split_name, acc=acc, prec=prec, rec=rec, f1=f1, auc=auc, thr=thr), cm, p

clf, m_tr, _, _ = evaluate_precomputed(K_trtr, ytr_sel, K_trtr, ytr_sel, "train")
_,   m_va, _, _ = evaluate_precomputed(K_trtr, ytr_sel, K_vatr, y[va_idx], "val")
_,   m_te, cm_te, p_te = evaluate_precomputed(K_trtr, ytr_sel, K_tetr, y[te_idx], "test")

df = pd.DataFrame([m_tr, m_va, m_te]); df.to_csv(RESULTS/"metrics/qsvm_kernel.csv", index=False)
df, cm_te

(   split       acc      prec  rec        f1       auc  thr
 0  train  0.922819  0.922819  1.0  0.959860  0.983996  0.5
 1    val  0.956376  0.956376  1.0  0.977702  0.795951  0.5
 2   test  0.922819  0.922819  1.0  0.959860  0.787510  0.5,
 array([[  0,  23],
        [  0, 275]]))