In [8]:
# ============================================
# Cell 0 — Experiment plan & seeds (GLOBAL)
# ============================================
# We'll run 5 instances and two depths: 1 and 3 layers.
INSTANCE_IDS   = [1, 2, 3, 4, 5]   # used in filenames as ..._ls_01.json, ..._ls_02.json, ...
LAYER_OPTIONS  = [1, 3]            # train 1-layer first, then 3-layers
EVAL_SIGMA     = 0.10              # fixed noise everywhere (train & eval)

# where to save artifacts (JSON bundles, instance records, CSV summary)
# tip: new folder so these runs don't mix with your 2L/2T ones
OUT_BASE = "./runs_halfqae_3L1T"

In [9]:
# =====================================================
# Cell 1 — Imports, utils, reproducibility (fixed seed)
# =====================================================
import os, sys, json, math, random, time, hashlib
from pathlib import Path
import numpy as np
import pennylane as qml
from pennylane import numpy as pnp
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# ----- repo utils (your existing readers) -----
current_dir = os.path.dirname(os.path.abspath(''))
parent_dir = os.path.dirname(current_dir) if os.path.basename(current_dir) == 'Jacob' else current_dir
sys.path.insert(0, parent_dir); sys.path.insert(0, '../')
try:
    from qae_utils.Files import read_ts_file, read_json_file
    print("Utils import OK")
except Exception as e:
    print("Import error:", e)
    qae_utils_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(''))), 'qae_utils')
    sys.path.insert(0, os.path.dirname(qae_utils_path))
    from qae_utils.Files import read_ts_file, read_json_file
    print("Absolute path fallback OK")

assert callable(read_ts_file) and callable(read_json_file)

# ----- plotting defaults -----
np.set_printoptions(suppress=True, precision=6)
plt.rcParams["figure.figsize"] = (6.5, 4)

# ----- reproducibility -----
def set_global_seed(instance_id: int):
    """
    Derive all RNGs from a simple instance ID (1..5).
    Keep the mapping stable across notebooks.
    """
    base = 10_000 + int(instance_id)  # simple, memorable
    random.seed(base + 11)
    np.random.seed(base + 22)
    try:
        pnp.random.seed(base + 33)
    except Exception:
        pass
    # Pennylane's default.qubit is deterministic given params; no device seeding needed.
    return dict(global_seed=base, numpy_seed=base+22, pnp_seed=base+33)

def std_instance_name(nq, n_latent, n_trash, n_layers, instance_id):
    """
    Standardized filename pattern used across the project.
    Example: 4q_2l_2t_3ls_01.json
    """
    return f"{int(nq)}q_{int(n_latent)}l_{int(n_trash)}t_{int(n_layers)}ls_{int(instance_id):02d}.json"

def ensure_dir(p): Path(p).mkdir(parents=True, exist_ok=True); return p

print("Seed/filename utils ready.")

Utils import OK
Seed/filename utils ready.


In [10]:
# =========================================
# Cell 2 — Data loading (deterministic)
# =========================================
DATA_PATH = '../jacobs_examples/aintern/data'
DATA_NAME = 'mackey_glass'  # your folder prefix

# fixed split across all instances/layers (so results are comparable)
SPLIT_RANDOM_STATE = 42
WINDOW_STRIDE = 1

# ---- pick most recent MG folder starting with mackey_glass* ----
data_folders = [f for f in os.listdir(DATA_PATH) if f.startswith(DATA_NAME)]
if not data_folders:
    raise FileNotFoundError("No Mackey-Glass data found. Generate it first.")
data_folders.sort()
data_folder = data_folders[-1]     # take the last one if multiple
print(f"Using data folder: {data_folder}")

# ---- load series + scaling info ----
X_idx = read_ts_file(f'{DATA_PATH}/{data_folder}/x_org.arr')   # indices for plotting, not used
y_all = read_ts_file(f'{DATA_PATH}/{data_folder}/y_org.arr')   # values
info  = read_json_file(f'{DATA_PATH}/{data_folder}/info.json')

print(f"Loaded {len(y_all)} samples; scale [{info['scale_low']:.3f},{info['scale_high']:.3f}]")

# ---- helper: uniform embed wrapper (works with/without explicit info param)
def embed_input(x, info_=None):
    """
    Map value-domain window x (in [lo,hi]) to RY(π·v01).
    Accepts optional info to match Stage-3 call signatures.
    """
    if info_ is None:
        info_ = info
    lo, hi = info_['scale_low'], info_['scale_high']
    xn = (pnp.array(x) - lo) / max(hi - lo, 1e-12)   # -> [0,1]
    for i, v in enumerate(xn):
        qml.RY(v * pnp.pi, wires=i)

Using data folder: mackey_glass_n100
Loaded 100 samples; scale [0.200,0.800]


In [11]:
# ============================================================
# Cell 3 — Architecture (do NOT change your brick/entanglers)
# ============================================================
# This notebook = 4 qubits, 3 latent, 1 trash.
n_qubits = 4
n_latent = 3
n_trash  = n_qubits - n_latent
latent_wires = list(range(n_latent))            # [0, 1, 2]
trash_wires  = list(range(n_latent, n_qubits))  # [3]
signal_wires = list(range(n_qubits))            # Stage-1 diagnostics use all wires
assert n_latent + n_trash == n_qubits

# --- device factory (simple; default.qubit) ---
def make_device(nq): 
    return qml.device('default.qubit', wires=nq)

# --- Stage-1 encoder template (unchanged architecture) ---
def encoder_template(params, n_layers):
    """RX/RY/RZ per qubit + ring CNOT per layer."""
    assert len(params) == n_layers * n_qubits * 3
    for l in range(n_layers):
        # local rotations
        for q in range(n_qubits):
            idx = l * n_qubits * 3 + q * 3
            qml.RX(params[idx + 0], wires=q)
            qml.RY(params[idx + 1], wires=q)
            qml.RZ(params[idx + 2], wires=q)
        # ring entanglers
        for q in range(n_qubits-1):
            qml.CNOT(wires=[q, q+1])
        qml.CNOT(wires=[n_qubits-1, 0])

print("Architecture set: 4q (3 latent and 1 trash).")

Architecture set: 4q (3 latent and 1 trash).


In [12]:
# ====================================================
# Cell 4 — Stage-1 QNodes, loss, and training (seeded)
# ====================================================
def stage1_qnodes(n_layers):
    dev = make_device(n_qubits)

    @qml.qnode(dev, interface="autograd", diff_method="backprop")
    def trash_expectations(phi, x_clean):
        embed_input(x_clean)
        encoder_template(phi, n_layers)
        return [qml.expval(qml.PauliZ(w)) for w in trash_wires]

    @qml.qnode(dev, interface="autograd", diff_method="backprop")
    def recon_EdagE(phi, x_clean):
        embed_input(x_clean)
        encoder_template(phi, n_layers)
        qml.adjoint(encoder_template)(phi, n_layers)   # E†
        return [qml.expval(qml.PauliZ(w)) for w in range(n_qubits)]

    return trash_expectations, recon_EdagE

def stage1_batch_loss(trash_expectations, phi, clean_batch):
    # L = mean_{batch,trash} P(|1>) = (1 - Z)/2
    zs = []
    for c in clean_batch:
        z = pnp.array(trash_expectations(phi, c))  # shape (n_trash,)
        zs.append(z)
    zs = pnp.stack(zs, axis=0)
    prob_one = (1.0 - zs) * 0.5
    return pnp.mean(prob_one)

def train_stage1(X_train, X_val, n_layers, instance_id, 
                 n_epochs=120, batch_size=32, lr_init=0.010,
                 patience=10, lr_patience=8, min_delta=1e-6):
    set_global_seed(instance_id)
    # init
    enc_shape = n_layers * n_qubits * 3
    phi = pnp.array(np.random.normal(0, 0.5, enc_shape), requires_grad=True)
    opt = qml.AdamOptimizer(stepsize=lr_init)
    lr = lr_init

    trash_expectations, recon_EdagE = stage1_qnodes(n_layers)

    # helper
    def minibatches(N, B, rng_seed=123456):
        rng = np.random.default_rng(rng_seed)  # fixed per-epoch seed below
        idx = rng.permutation(N)
        for i in range(0, N, B):
            yield idx[i:i+B]

    train_hist, val_hist, lr_hist = [], [], []
    best_phi, best_val = None, float("inf")
    no_improve = 0
    for ep in range(n_epochs):
        # batch order deterministic per-epoch per-instance
        seed_ep = 77_000 + 100*instance_id + ep
        acc = 0.0; nb = 0
        for ix in minibatches(len(X_train), batch_size, rng_seed=seed_ep):
            clean_batch = X_train[ix]
            def loss_fn(p): return stage1_batch_loss(trash_expectations, p, clean_batch)
            phi, cost = opt.step_and_cost(loss_fn, phi)
            acc += float(cost); nb += 1
        train_cost = acc / max(nb, 1)

        # validation
        v_costs = []
        for c in X_val:
            v_costs.append(stage1_batch_loss(trash_expectations, phi, pnp.array([c])))
        val_cost = float(pnp.mean(pnp.stack(v_costs)))

        train_hist.append(train_cost); val_hist.append(val_cost); lr_hist.append(lr)

        if val_cost + min_delta < best_val:
            best_val, best_phi = val_cost, pnp.array(phi, requires_grad=False); no_improve = 0
        else:
            no_improve += 1
            if (no_improve % lr_patience) == 0:
                lr = max(lr * 0.5, 1e-4)
                opt = qml.AdamOptimizer(stepsize=lr)
                print(f"[Stage1] ↓ LR → {lr:.5f}")
            if no_improve >= patience:
                print("[Stage1] Early stopping."); break

        print(f"[Stage1] L={n_layers} ep {ep:03d} | train {train_cost:.6f} | val {val_cost:.6f} | LR {lr:.5f}")

    phi_best = best_phi if best_phi is not None else phi
    return dict(
        phi=phi_best, best_val=float(best_val),
        hist_train=list(map(float, train_hist)),
        hist_val=list(map(float, val_hist)),
        hist_lr=list(map(float, lr_hist)),
        recon_EdagE=recon_EdagE
    )

In [14]:
# ======================================================================
# Cell 5 — Stage-3 (ψ) with fixed decoder = adjoint(Stage-1 encoder φ)
#          (records hist_noisy, hist_delta, best_epoch, epochs, seconds)
# ======================================================================
import time

def stage3_qnodes(n_layers, phi_stage1):
    dev3 = make_device(n_qubits)
    theta_fixed = pnp.array(phi_stage1, requires_grad=False).reshape((n_layers, n_qubits, 3))

    def encoder_fixed_body(theta):
        for l in range(n_layers):
            for q in range(n_qubits):
                qml.RX(theta[l, q, 0], wires=q)
                qml.RY(theta[l, q, 1], wires=q)
                qml.RZ(theta[l, q, 2], wires=q)
            for q in range(n_qubits-1):
                qml.CNOT(wires=[q, q+1])
            qml.CNOT(wires=[n_qubits-1, 0])

    def decoder_fixed():
        qml.adjoint(encoder_fixed_body)(theta_fixed)

    @qml.qnode(dev3, interface="autograd", diff_method="backprop")
    def encoder_only_expZ_all(flat_params, x_in):
        embed_input(x_in)
        encoder_template(flat_params, n_layers)
        return [qml.expval(qml.PauliZ(w)) for w in range(n_qubits)]

    @qml.qnode(dev3, interface="autograd", diff_method="backprop")
    def teacher_code_latents(x_in):
        embed_input(x_in)
        encoder_fixed_body(theta_fixed)
        return [qml.expval(qml.PauliZ(w)) for w in range(n_latent)]

    @qml.qnode(dev3, interface="autograd", diff_method="backprop")
    def student_code_latents(flat_params, x_in):
        embed_input(x_in)
        encoder_template(flat_params, n_layers)
        return [qml.expval(qml.PauliZ(w)) for w in range(n_latent)]

    @qml.qnode(dev3, interface="autograd", diff_method="backprop")
    def denoiser_qnode_all(flat_params, x_noisy):
        embed_input(x_noisy)
        encoder_template(flat_params, n_layers)
        decoder_fixed()
        return [qml.expval(qml.PauliZ(w)) for w in range(n_qubits)]

    return dict(
        theta_fixed=theta_fixed,
        encoder_only_expZ_all=encoder_only_expZ_all,
        teacher_code_latents=teacher_code_latents,
        student_code_latents=student_code_latents,
        denoiser_qnode_all=denoiser_qnode_all
    )

# ----- value readout helpers (unchanged)
def Z_to_values_autograd(z_all):
    z_all = pnp.clip(pnp.asarray(z_all), -0.999999, 0.999999)
    v01 = pnp.arccos(z_all) / pnp.pi
    return v01 * (info["scale_high"] - info["scale_low"]) + info["scale_low"]

def first_diff(x): 
    x = pnp.array(x); return x[1:] - x[:-1]

def p1_from_expZ(z): 
    return (1 - pnp.asarray(z)) * 0.5

# ----- deterministic noisy window (shared with eval)
def ts_add_noise_window_det(x, sigma, seed):
    low, high = float(info["scale_low"]), float(info["scale_high"])
    rng = np.random.default_rng(int(seed))
    noise = rng.normal(0.0, sigma * (high - low), size=np.asarray(x).shape)
    return np.clip(np.asarray(x) + noise, low, high)

# ----- validation with fixed per-window seeds (unchanged)
VAL_BASE_SEED = 12345

def stage3_val_values_det(psi, X_clean, sigma=EVAL_SIGMA):
    ms_noisy, ms_deno = [], []
    for i, c in enumerate(X_clean):
        n = ts_add_noise_window_det(c, sigma, seed=VAL_BASE_SEED + i)
        zD = np.array(stage3_handles["denoiser_qnode_all"](psi, n))
        v_hat = np.array(Z_to_values_autograd(zD))
        ms_noisy.append(np.mean((np.asarray(c) - np.asarray(n))**2))
        ms_deno.append(np.mean((np.asarray(c) - v_hat)**2))
    mN, mD = float(np.mean(ms_noisy)), float(np.mean(ms_deno))
    d_pct = 100.0 * (1.0 - mD / max(mN, 1e-12))
    return mN, mD, d_pct

# ----- small Huber
def huber(residual, delta):
    r = pnp.abs(residual)
    return pnp.where(r <= delta, 0.5*r**2, delta*(r - 0.5*delta))


def train_stage3(X_train, X_val, phi_stage1, n_layers, instance_id,
                 TARGET_NOISE=EVAL_SIGMA, MAX_EPOCHS=60, BATCH=16, 
                 LR_START=0.003, PATIENCE=10, PLATEAU_STEPS=5, PLATEAU_FACTOR=0.5,
                 CLIP_NORM=2.0, USE_EMA=True, EMA_DECAY=0.99):

    # ---- seeds: varied but reproducible across (instance, layers, epoch, window)
    def make_train_seed(instance_id, layers, ep, k, view=0):
        return (1_000_003 * (instance_id * 10 + layers) + 97 * ep + 31 * int(k) + view) % 2_147_483_647

    set_global_seed(instance_id)

    global stage3_handles
    stage3_handles = stage3_qnodes(n_layers, phi_stage1)
    enc_all = stage3_handles["encoder_only_expZ_all"]
    teacher_lat = stage3_handles["teacher_code_latents"]
    denoise_all = stage3_handles["denoiser_qnode_all"]

    # ---- init ψ near φ
    phi_flat = pnp.array(phi_stage1, requires_grad=False)
    psi = pnp.array(np.array(phi_flat) + 0.05*np.random.randn(len(phi_flat)), requires_grad=True)

    # ---- loss weights
    ALPHA_REC, BETA_TF, GAMMA_TRASH, L_TV, L_ANCH = 1.0, 0.05, 0.5, 0.05, 2e-4
    DELTA_TV, DELTA_Z = 0.02, 0.25

    # loss on a single window with a specific noise seed
    def loss_on_window_seeded(params, clean_values, seed):
        v_noisy = pnp.array(ts_add_noise_window_det(clean_values, TARGET_NOISE, seed=seed))
        z_all = pnp.array(enc_all(params, v_noisy))
        z_sig, z_tr = z_all[:n_latent], z_all[n_latent:]
        zD = pnp.array(denoise_all(params, v_noisy))
        v_hat = Z_to_values_autograd(zD)

        L_rec = pnp.mean((pnp.array(clean_values) - v_hat)**2)
        z_t_sig = pnp.array(teacher_lat(clean_values))
        L_tf = pnp.mean(huber(z_t_sig - z_sig, DELTA_Z))
        L_tr = pnp.mean(p1_from_expZ(z_tr))
        L_tv = pnp.mean(huber(first_diff(clean_values) - first_diff(v_hat), DELTA_TV))
        L_anchor = pnp.mean((params - phi_flat)**2)
        return (ALPHA_REC*L_rec + BETA_TF*L_tf + GAMMA_TRASH*L_tr + L_TV*L_tv + L_ANCH*L_anchor)

    # manual Adam
    m = pnp.zeros_like(psi); v = pnp.zeros_like(psi)
    b1, b2, eps = 0.9, 0.999, 1e-8
    t = 0
    def adam_step(params, grad, lr):
        nonlocal m, v, t
        t += 1
        m = b1*m + (1-b1)*grad
        v = b2*v + (1-b2)*(grad*grad)
        mhat = m/(1-b1**t); vhat = v/(1-b2**t)
        return params - lr * (mhat/(pnp.sqrt(vhat)+eps))

    # batches deterministic per-epoch
    def batch_indices(N, B, ep_seed):
        rng = np.random.default_rng(ep_seed)
        idx = rng.permutation(N)
        for s in range(0, N, B):
            yield idx[s:s+B]

    best_params, best_val = None, float("inf")
    best_epoch = -1
    no_improve, lr = 0, LR_START
    ema = pnp.array(psi, requires_grad=False) if USE_EMA else None

    # history buffers (for CSV/reporting)
    hist_train, hist_val = [], []
    hist_noisy, hist_delta = [], []

    t0 = time.time()

    for ep in range(MAX_EPOCHS):
        seed_ep = 88_000 + 100*instance_id + ep  # reproducible shuffling
        acc, nb = 0.0, 0
        for ix in batch_indices(len(X_train), BATCH, ep_seed=seed_ep):
            for k in ix:                                   # k = absolute index in X_train
                c = X_train[k]
                seed = make_train_seed(instance_id, n_layers, ep, int(k))
                L = loss_on_window_seeded(psi, c, seed)
                if not pnp.isfinite(L): 
                    continue
                g = qml.grad(lambda p: loss_on_window_seeded(p, c, seed))(psi)
                if not pnp.all(pnp.isfinite(g)): 
                    continue
                # clip
                gnorm = pnp.linalg.norm(g) + 1e-12
                if gnorm > CLIP_NORM:
                    g = g * (CLIP_NORM / gnorm)
                psi = adam_step(psi, g, lr)
                if USE_EMA: 
                    ema = EMA_DECAY*ema + (1-EMA_DECAY)*psi
                acc += float(L); nb += 1

        train_loss = acc / max(nb, 1)
        eval_params = ema if USE_EMA else psi

        # strict value-domain validation at σ=EVAL_SIGMA (deterministic per window)
        mN, mD, dV = stage3_val_values_det(eval_params, X_val, sigma=EVAL_SIGMA)
        hist_train.append(train_loss); hist_val.append(mD)
        hist_noisy.append(mN);        hist_delta.append(dV)

        if mD < best_val - 1e-12:
            best_val, best_params, best_epoch, no_improve = mD, pnp.array(eval_params, requires_grad=False), ep, 0
        else:
            no_improve += 1
            if (no_improve % PLATEAU_STEPS) == 0:
                lr *= PLATEAU_FACTOR
                print(f"[Stage3] Plateau → LR {lr:.5f}")

        norm_diff = float(pnp.linalg.norm((eval_params - phi_flat)))
        print(f"[Stage3] L={n_layers} ep {ep:03d} | train {train_loss:.5f} | "
              f"val {mD:.5f} | noisy {mN:.5f} | Δ {dV:+.1f}% | LR {lr:.5f} | ||ψ-φ|| {norm_diff:.3f}")

        if no_improve >= PATIENCE:
            print("[Stage3] Early stopping."); break

    train_seconds = float(time.time() - t0)
    epochs_run = len(hist_val)

    psi_best = best_params if best_params is not None else (ema if USE_EMA else psi)

    return dict(
        psi=psi_best, 
        best_val=float(best_val),
        best_epoch=int(best_epoch),
        epochs=int(epochs_run),
        hist_train=list(map(float, hist_train)),
        hist_val=list(map(float, hist_val)),
        hist_noisy=list(map(float, hist_noisy)),
        hist_delta=list(map(float, hist_delta)),
        train_seconds=train_seconds
    )

In [15]:
# ===================================================
# Cell 6 — Build windows & deterministic train/val/test
# ===================================================
window_size = n_qubits
stride = WINDOW_STRIDE

X_windows = np.array([y_all[i:i+window_size] for i in range(0, len(y_all)-window_size+1, stride)], dtype=float)
print(f"Total windows built: {len(X_windows)} (W={window_size}, step={stride})")

# 60/20/20 split (deterministic)
X_temp, X_test = train_test_split(X_windows, test_size=0.20, random_state=SPLIT_RANDOM_STATE)
X_train, X_val = train_test_split(X_temp,   test_size=0.25, random_state=SPLIT_RANDOM_STATE)  # 0.25 of 0.8 = 0.2
print(f"Split sizes → train={len(X_train)}, val={len(X_val)}, test={len(X_test)}")

Total windows built: 97 (W=4, step=1)
Split sizes → train=57, val=20, test=20


In [16]:
# ============================================
# Cell 7 — Train runs (instances × layers)
# ============================================
RUNS = []  # we’ll save each run in the next cell

for L in LAYER_OPTIONS:
    for inst in INSTANCE_IDS:
        print(f"\n==============================")
        print(f"Instance {inst} | Layers {L}")
        print(f"==============================")

        t0 = time.time()
        s1 = train_stage1(
            X_train, X_val,
            n_layers=L,
            instance_id=inst,
            n_epochs=120, batch_size=32,
            lr_init=0.010, patience=10, lr_patience=8, min_delta=1e-6
        )
        t1 = time.time()

        s3 = train_stage3(
            X_train, X_val,
            phi_stage1=s1["phi"],
            n_layers=L,
            instance_id=inst,
            TARGET_NOISE=EVAL_SIGMA, MAX_EPOCHS=60, BATCH=16,
            LR_START=0.003, PATIENCE=10, PLATEAU_STEPS=5, PLATEAU_FACTOR=0.5,
            CLIP_NORM=2.0, USE_EMA=True, EMA_DECAY=0.99
        )
        t2 = time.time()

        RUNS.append({
            "instance_id": inst,
            "n_layers": L,
            "stage1": {
                "phi": s1["phi"],
                "best_val": s1["best_val"],
                "hist_train": s1["hist_train"],
                "hist_val": s1["hist_val"],
                "hist_lr": s1["hist_lr"],
                "best_epoch": s1.get("best_epoch"),
                "epochs": s1.get("epochs"),
                "train_seconds": float(t1 - t0),
            },
            "stage3": {
                "psi": s3["psi"],
                "best_val": s3["best_val"],
                "best_epoch": s3.get("best_epoch"),
                "epochs": s3.get("epochs"),
                "hist_train": s3["hist_train"],
                "hist_val": s3["hist_val"],
                # NEW: capture these so Cell 8 has them
                "hist_noisy": s3.get("hist_noisy", []),
                "hist_delta": s3.get("hist_delta", []),
                "train_seconds": float(t2 - t1),
            }
        })

print(f"\nCompleted {len(RUNS)} runs.")


Instance 1 | Layers 1
[Stage1] L=1 ep 000 | train 0.479147 | val 0.492094 | LR 0.01000
[Stage1] L=1 ep 001 | train 0.473263 | val 0.488782 | LR 0.01000
[Stage1] L=1 ep 002 | train 0.468699 | val 0.485577 | LR 0.01000
[Stage1] L=1 ep 003 | train 0.466494 | val 0.482410 | LR 0.01000
[Stage1] L=1 ep 004 | train 0.464922 | val 0.479362 | LR 0.01000
[Stage1] L=1 ep 005 | train 0.457612 | val 0.476435 | LR 0.01000
[Stage1] L=1 ep 006 | train 0.458353 | val 0.473671 | LR 0.01000
[Stage1] L=1 ep 007 | train 0.454292 | val 0.471091 | LR 0.01000
[Stage1] L=1 ep 008 | train 0.451598 | val 0.468693 | LR 0.01000
[Stage1] L=1 ep 009 | train 0.450211 | val 0.466493 | LR 0.01000
[Stage1] L=1 ep 010 | train 0.447326 | val 0.464429 | LR 0.01000
[Stage1] L=1 ep 011 | train 0.443890 | val 0.462511 | LR 0.01000
[Stage1] L=1 ep 012 | train 0.440614 | val 0.460709 | LR 0.01000
[Stage1] L=1 ep 013 | train 0.439578 | val 0.458964 | LR 0.01000
[Stage1] L=1 ep 014 | train 0.440212 | val 0.457252 | LR 0.01000
[S

In [20]:
# ======================================================================
# Cell 8 — Save artifacts (JSON) and append a paper-ready CSV per run
# ======================================================================
from pathlib import Path
import json, time, os, csv
import numpy as np
import pandas as pd

# --- hyperparams logged (keep in sync with training cells) ---
S1_LR_INIT       = 0.010
S1_MAX_EPOCHS    = 120
S1_PATIENCE      = 10
S1_LR_PATIENCE   = 8

S3_LR_INIT       = 0.003
S3_MAX_EPOCHS    = 60
S3_PATIENCE      = 10
S3_PLATEAU_STEPS = 5
S3_PLATEAU_FACT  = 0.5

CSV_SCHEMA_VERSION = "v3"  # keep same as earlier runs so we reuse the same CSV

# --- ensure dirs ---
ensure_dir(OUT_BASE)
# one folder per architecture (e.g., runs_halfqae/q4_l3t1)
subroot = ensure_dir(f"{OUT_BASE}/q{n_qubits}_l{n_latent}t{n_trash}")

# --- CSV path (shared across ALL architectures/runs) ---
CSV_PATH = f"{OUT_BASE}/all_training_instances_{CSV_SCHEMA_VERSION}.csv"

# --- header for the full, paper-friendly table ---
CSV_HEADER = [
    # id / naming
    "filename","run_tag","dataset_folder","instance_id","rng_seed",
    # architecture
    "n_qubits","n_latent","n_trash","n_layers",
    # noise & window
    "sigma_train","sigma_eval","window_stride",
    # stage-1 hyperparams + outcomes
    "s1_lr_init","s1_max_epochs","s1_patience","s1_lr_patience",
    "s1_best_val","s1_final_val","s1_best_epoch","s1_epochs","s1_train_seconds",
    # stage-3 hyperparams + outcomes
    "s3_lr_init","s3_max_epochs","s3_patience","s3_plateau_steps","s3_plateau_factor",
    "s3_best_val_mse","s3_final_val_mse","s3_best_epoch","s3_epochs","s3_train_seconds",
    "s3_noisy_baseline_mse","s3_best_delta_pct","s3_final_delta_pct",
    # params (JSON)
    "phi_params","psi_params",
    # totals
    "total_train_seconds",
]

def ensure_csv(path, header):
    # Create only if missing; never rewrite an existing header.
    if not os.path.exists(path):
        with open(path, "w", newline="", encoding="utf-8") as f:
            csv.writer(f).writerow(header)

ensure_csv(CSV_PATH, CSV_HEADER)

# ------------------------ safe helpers ------------------------
def _safe_argmin(seq):
    try:
        return int(np.nanargmin(seq)) if len(seq) else -1
    except Exception:
        return -1

def _safe_last(seq):
    return float(seq[-1]) if (isinstance(seq, (list, tuple)) and len(seq)) else np.nan

def _safe_int(x, default):
    if x is None:
        return default
    try:
        # catch "nan" float case
        if isinstance(x, float) and np.isnan(x):
            return default
        return int(x)
    except Exception:
        return default

def _safe_float(x, default=np.nan):
    if x is None:
        return default
    try:
        return float(x)
    except Exception:
        return default
# --------------------------------------------------------------

def save_one_run(run):
    global stage3_handles  # needed by stage3_val_values_det fallback

    inst = int(run["instance_id"])
    L    = int(run["n_layers"])
    seed = int(run.get("seed", inst))

    # standardized filename includes arch + layers + instance, so no collisions
    fname = std_instance_name(n_qubits, n_latent, n_trash, L, inst)

    # Save all instances for this architecture in the same folder (no per-layer subfolders)
    out_dir = subroot
    bundle_path = os.path.join(out_dir, fname)

    # pull stage results (robust to missing keys / None)
    s1 = run["stage1"]
    s3 = run["stage3"]

    # Stage-1 metrics
    s1_hist_val = list(map(float, s1.get("hist_val", [])))
    s1_best_val = _safe_float(s1.get("best_val"), np.nan)
    s1_final_val = _safe_last(s1_hist_val)
    s1_best_epoch = _safe_int(s1.get("best_epoch"), _safe_argmin(s1_hist_val))
    s1_epochs     = _safe_int(s1.get("epochs"), len(s1_hist_val))
    s1_seconds    = _safe_float(s1.get("train_seconds"), np.nan)

    # Stage-3 series
    s3_hist_val   = list(map(float, s3.get("hist_val", [])))
    s3_hist_noisy = list(map(float, s3.get("hist_noisy", [])))
    s3_hist_delta = list(map(float, s3.get("hist_delta", [])))

    s3_best_val   = _safe_float(s3.get("best_val"), np.nan)
    s3_final_val  = _safe_last(s3_hist_val)
    s3_best_epoch = _safe_int(s3.get("best_epoch"), _safe_argmin(s3_hist_val))
    s3_epochs     = _safe_int(s3.get("epochs"), len(s3_hist_val))
    s3_seconds    = _safe_float(s3.get("train_seconds"), np.nan)

    # --- compute metrics with FALLBACKS if curves are missing ---
    noisy_baseline = float(np.nanmean(s3_hist_noisy)) if len(s3_hist_noisy) else np.nan
    best_delta     = (float(np.nanmax(s3_hist_delta)) if (len(s3_hist_delta) and np.isfinite(np.nanmax(s3_hist_delta)))
                      else np.nan)
    final_delta    = _safe_last(s3_hist_delta)

    need_fallback = (not len(s3_hist_noisy)) or (not np.isfinite(noisy_baseline)) or (not np.isfinite(final_delta))

    if need_fallback:
        # Rebuild the QNodes for this (L, phi) so we can evaluate psi on X_val
        phi_for_L = np.array(s1.get("phi", []))
        stage3_handles = stage3_qnodes(L, phi_for_L)  # sets the fixed decoder from φ
        psi_params = np.array(s3.get("psi", []))
        # Deterministic validation at σ = EVAL_SIGMA
        mN, mD, d_pct = stage3_val_values_det(psi_params, X_val, sigma=EVAL_SIGMA)
        noisy_baseline = float(mN)
        final_delta    = float(d_pct)
        if not np.isfinite(best_delta):  # if we don't have a curve, use final as best
            best_delta = final_delta

    # bundle JSON (parameters + training curves)
    bundle = {
        "schema": {"name": "half_qae_bundle", "version": "1.0"},
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
        "dataset": {
            "id": data_folder,
            "scale_low":  float(info["scale_low"]),
            "scale_high": float(info["scale_high"]),
            "window_size": int(n_qubits),
            "window_stride": int(WINDOW_STRIDE),
        },
        "run": {
            "tag": f"inst{inst}_L{L}",
            "instance_id": inst,
            "seed": seed,
            "sigma_train": float(EVAL_SIGMA),
            "sigma_eval":  float(EVAL_SIGMA),
        },
        "architecture": {
            "n_qubits": int(n_qubits),
            "n_layers": int(L),
            "n_latent": int(n_latent),
            "n_trash":  int(n_trash),
            "latent_wires": list(range(n_latent)),
            "trash_wires":  list(range(n_latent, n_qubits)),
        },
        "training": {
            "stage1": {
                "lr_init": S1_LR_INIT, "max_epochs": S1_MAX_EPOCHS,
                "patience": S1_PATIENCE, "lr_patience": S1_LR_PATIENCE,
                "best_val": s1_best_val, "final_val": s1_final_val,
                "best_epoch": s1_best_epoch, "epochs": s1_epochs,
                "train_curve": s1.get("hist_train", []), "val_curve": s1_hist_val, "lr_curve": s1.get("hist_lr", []),
                "train_seconds": s1_seconds,
            },
            "stage3": {
                "lr_init": S3_LR_INIT, "max_epochs": S3_MAX_EPOCHS,
                "patience": S3_PATIENCE, "plateau_steps": S3_PLATEAU_STEPS, "plateau_factor": S3_PLATEAU_FACT,
                "best_val_mse": s3_best_val, "final_val_mse": s3_final_val,
                "best_epoch": s3_best_epoch, "epochs": s3_epochs,
                "train_curve": s3.get("hist_train", []), "val_curve": s3_hist_val,
                "noisy_curve": s3.get("hist_noisy", []), "delta_curve": s3_hist_delta,
                "train_seconds": s3_seconds,
            }
        },
        "parameters": {
            "phi_stage1": np.array(s1.get("phi", [])).tolist(),
            "psi_stage3": np.array(s3.get("psi", [])).tolist(),
        },
    }
    with open(bundle_path, "w", encoding="utf-8") as f:
        json.dump(bundle, f, indent=2)
    print(f"Saved bundle → {bundle_path}")

    # assemble CSV row
    phi_params = json.dumps(bundle["parameters"]["phi_stage1"])
    psi_params = json.dumps(bundle["parameters"]["psi_stage3"])
    total_seconds = float((0 if np.isnan(s1_seconds) else s1_seconds) + (0 if np.isnan(s3_seconds) else s3_seconds))

    row = [
        os.path.basename(bundle_path),
        f"inst{inst}_L{L}",
        data_folder,
        inst, seed,
        int(n_qubits), int(n_latent), int(n_trash), int(L),
        f"{EVAL_SIGMA:.3f}", f"{EVAL_SIGMA:.3f}", int(WINDOW_STRIDE),
        f"{S1_LR_INIT:.6f}", int(S1_MAX_EPOCHS), int(S1_PATIENCE), int(S1_LR_PATIENCE),
        f"{s1_best_val:.8f}", f"{s1_final_val:.8f}", s1_best_epoch, s1_epochs, s1_seconds,
        f"{S3_LR_INIT:.6f}", int(S3_MAX_EPOCHS), int(S3_PATIENCE), int(S3_PLATEAU_STEPS), f"{S3_PLATEAU_FACT:.3f}",
        f"{s3_best_val:.8f}", f"{s3_final_val:.8f}", s3_best_epoch, s3_epochs, s3_seconds,
        noisy_baseline, best_delta, final_delta,
        phi_params, psi_params,
        total_seconds,
    ]

    # upsert row into CSV (by unique filename)
    row_df = pd.DataFrame([row], columns=CSV_HEADER)
    if Path(CSV_PATH).exists():
        df_old = pd.read_csv(CSV_PATH)
        key = os.path.basename(bundle_path)
        if "filename" in df_old.columns:
            df_old = df_old[df_old["filename"] != key]
        df_new = pd.concat([df_old, row_df], ignore_index=True)
        df_new.to_csv(CSV_PATH, index=False)
    else:
        row_df.to_csv(CSV_PATH, index=False)
    print(f"Upserted CSV row  → {CSV_PATH}")

# ---- save all runs from Cell 7 ----
for run in RUNS:
    save_one_run(run)

print("\nAll runs saved and recorded.")

Saved bundle → ./runs_halfqae_3L1T/q4_l3t1/4q_3l_1t_1ls_01.json
Upserted CSV row  → ./runs_halfqae_3L1T/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae_3L1T/q4_l3t1/4q_3l_1t_1ls_02.json
Upserted CSV row  → ./runs_halfqae_3L1T/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae_3L1T/q4_l3t1/4q_3l_1t_1ls_03.json
Upserted CSV row  → ./runs_halfqae_3L1T/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae_3L1T/q4_l3t1/4q_3l_1t_1ls_04.json
Upserted CSV row  → ./runs_halfqae_3L1T/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae_3L1T/q4_l3t1/4q_3l_1t_1ls_05.json
Upserted CSV row  → ./runs_halfqae_3L1T/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae_3L1T/q4_l3t1/4q_3l_1t_3ls_01.json
Upserted CSV row  → ./runs_halfqae_3L1T/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae_3L1T/q4_l3t1/4q_3l_1t_3ls_02.json
Upserted CSV row  → ./runs_halfqae_3L1T/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae_3L1T/q4_l3t1/4q_3l_1t_3ls_03.jso

In [21]:
# =========================================================
# Cell 9 — Build & preview the training-only results table
# =========================================================
import pandas as pd
import numpy as np
from pathlib import Path

if not Path(CSV_PATH).exists():
    raise FileNotFoundError(f"CSV not found: {CSV_PATH}. Run Cell 8 first.")

df = pd.read_csv(CSV_PATH)

# Drop duplicate runs; keep the newest copy (with baseline/delta)
if "filename" in df.columns:
    df = df.drop_duplicates(subset=["filename"], keep="last")
else:
    df = df.drop_duplicates(subset=["run_tag","instance_id","n_layers"], keep="last")

# Typical numeric casts (safe)
for col in [
    "s3_noisy_baseline_mse","s3_best_delta_pct","s3_final_delta_pct",
    "s3_best_val_mse","s3_final_val_mse",
    "s1_best_val","s1_final_val",
    "s1_train_seconds","s3_train_seconds","total_train_seconds",
    "s1_best_epoch","s1_epochs","s3_best_epoch","s3_epochs",
    "n_qubits","n_latent","n_trash","n_layers","instance_id","rng_seed",
    "window_stride"
]:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

df = df.sort_values(["n_layers","instance_id"]).reset_index(drop=True)

clean_path = f"{OUT_BASE}/all_training_instances_{CSV_SCHEMA_VERSION}.csv"
Path(OUT_BASE).mkdir(parents=True, exist_ok=True)
df.to_csv(clean_path, index=False)
print(f"Saved training-only table → {clean_path}")

# A compact per-layer summary (mean±std); guards against all-NaN
def mean_std_safe(s: pd.Series) -> str:
    v = pd.to_numeric(s, errors="coerce").astype(float)
    v = v[np.isfinite(v)]
    if v.size == 0: return "n/a"
    return f"{v.mean():.6f} ± {v.std(ddof=0):.6f}"

metrics = [
    ("s3_noisy_baseline_mse", "noisy_baseline_mse (mean±std)"),
    ("s3_best_val_mse",       "best_val_mse (mean±std)"),
    ("s3_final_val_mse",      "final_val_mse (mean±std)"),
    ("s3_best_delta_pct",     "best_delta_pct (mean±std)"),
    ("s3_final_delta_pct",    "final_delta_pct (mean±std)"),
    ("s1_best_val",           "s1_best_val (mean±std)"),
]

grp = df.groupby("n_layers", dropna=False)
summary = pd.DataFrame({"runs": grp.size()})
for col, label in metrics:
    if col in df.columns and np.isfinite(df[col]).any():
        summary[label] = grp[col].apply(mean_std_safe)

summary_path = f"{OUT_BASE}/summary_by_layers_{CSV_SCHEMA_VERSION}.csv"
summary.to_csv(summary_path, index=True)
print(f"Saved per-layer summary → {summary_path}")

display(df.head(10))
display(summary)

Saved training-only table → ./runs_halfqae_3L1T/all_training_instances_v3.csv
Saved per-layer summary → ./runs_halfqae_3L1T/summary_by_layers_v3.csv


Unnamed: 0,filename,run_tag,dataset_folder,instance_id,rng_seed,n_qubits,n_latent,n_trash,n_layers,sigma_train,...,s3_final_val_mse,s3_best_epoch,s3_epochs,s3_train_seconds,s3_noisy_baseline_mse,s3_best_delta_pct,s3_final_delta_pct,phi_params,psi_params,total_train_seconds
0,4q_3l_1t_1ls_01.json,inst1_L1,mackey_glass_n100,1,1,4,3,1,1,0.1,...,0.002982,20,31,47.967,0.003908,24.797495,23.687649,"[1.8694918844253553e-05, 0.9774636820433608, 0...","[-0.0021476862733762012, 0.996444032914851, -0...",93.664973
1,4q_3l_1t_1ls_02.json,inst2_L1,mackey_glass_n100,2,2,4,3,1,1,0.1,...,0.003593,6,17,26.200279,0.003908,10.92737,8.062937,"[2.0115671551483984e-06, -1.9414957211684758, ...","[-0.005241657989718851, -2.042432850799373, -0...",72.431846
2,4q_3l_1t_1ls_03.json,inst3_L1,mackey_glass_n100,3,3,4,3,1,1,0.1,...,0.002994,28,39,59.531065,0.003908,23.789326,23.382216,"[-0.00010595659577317256, 1.0257370823253933, ...","[-0.0002708157444107569, 0.988621714592207, 1....",95.403962
3,4q_3l_1t_1ls_04.json,inst4_L1,mackey_glass_n100,4,4,4,3,1,1,0.1,...,0.004372,3,14,22.178038,0.003908,10.904187,-11.892166,"[-0.0019162555727775263, -1.718724799644914, -...","[0.08051271935437239, -1.752298458289274, -0.0...",68.000417
4,4q_3l_1t_1ls_05.json,inst5_L1,mackey_glass_n100,5,5,4,3,1,1,0.1,...,0.002997,15,26,39.887537,0.003908,23.758507,23.310451,"[-0.002231335380760849, -2.116122838099371, 0....","[0.0013592018201680257, -2.1589208129393076, 0...",86.037052
5,4q_3l_1t_3ls_01.json,inst1_L3,mackey_glass_n100,1,1,4,3,1,3,0.1,...,0.002934,35,46,166.734177,0.003908,25.250994,24.922442,"[2.012897733172297, 0.17102605298469328, 0.249...","[2.387139420937167, 0.004220215992417614, 0.09...",287.39843
6,4q_3l_1t_3ls_02.json,inst2_L3,mackey_glass_n100,2,2,4,3,1,3,0.1,...,0.002684,38,49,213.243605,0.003908,32.321002,31.322105,"[0.17228507891403036, -0.1918817618118622, -0....","[0.012376197078302823, -0.16101771105191287, -...",329.64212
7,4q_3l_1t_3ls_03.json,inst3_L3,mackey_glass_n100,3,3,4,3,1,3,0.1,...,0.002755,39,50,201.356277,0.003908,30.427728,29.510512,"[1.5650355176978477, -0.011291939947879737, 0....","[1.5519425762251506, -0.002817705950746741, 0....",317.262585
8,4q_3l_1t_3ls_04.json,inst4_L3,mackey_glass_n100,4,4,4,3,1,3,0.1,...,0.002986,26,37,159.787756,0.003908,27.563212,23.598995,"[-1.1995772979913892, 0.03568559541760664, -1....","[-1.390070183105758, 0.1460636928406844, -1.27...",280.826698
9,4q_3l_1t_3ls_05.json,inst5_L3,mackey_glass_n100,5,5,4,3,1,3,0.1,...,0.002566,52,60,312.112794,0.003908,34.975312,34.33869,"[-0.48486297678719315, -0.3432019283736144, 0....","[-0.6005963743263021, -0.3520486794710095, 0.7...",432.757829


Unnamed: 0_level_0,runs,noisy_baseline_mse (mean±std),best_val_mse (mean±std),final_val_mse (mean±std),best_delta_pct (mean±std),final_delta_pct (mean±std),s1_best_val (mean±std)
n_layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,5,0.003908 ± 0.000000,0.003172 ± 0.000253,0.003388 ± 0.000545,18.835377 ± 6.477129,13.310217 ± 13.941557,0.260137 ± 0.004198
3,5,0.003908 ± 0.000000,0.002731 ± 0.000134,0.002785 ± 0.000156,30.107649 ± 3.428025,28.738549 ± 3.990237,0.031344 ± 0.008148
