In [40]:
# ============================================
# Cell 0 — Experiment plan & seeds (GLOBAL)
# ============================================
# For THIS notebook: 4q / 2 latent / 2 trash
# We run 2 layer depths (1,3) × 5 instances = 10 runs PER dataset.
# With 2 datasets that’s 20 runs from this notebook.
INSTANCE_IDS   = [1, 2, 3, 4, 5]   # five instances per (dataset, depth)
LAYER_OPTIONS  = [1, 3]

# We use PRE-NOISED datasets for Stage-3 at sigma = 0.20
NOISE_SIGMA    = 0.20
EVAL_SIGMA     = NOISE_SIGMA

# Two Mackey-Glass datasets
DATASETS_TO_RUN = [
    "mackey_glass_tau17_n200",
    "mackey_glass_tau30_n200",
]

# Where to save (folder structure fixed below)
OUT_BASE = "./runs_qae"   # <- per your requested name

In [41]:
# =====================================================
# Cell 1 — Imports, utils, reproducibility (fixed seed)
# =====================================================
import os, sys, json, math, random, time, hashlib, csv
from pathlib import Path
import numpy as np
import pandas as pd
import pennylane as qml
from pennylane import numpy as pnp
import matplotlib.pyplot as plt

# ----- repo utils (your existing readers) -----
current_dir = os.path.dirname(os.path.abspath(''))
parent_dir = os.path.dirname(current_dir) if os.path.basename(current_dir) == 'Jacob' else current_dir
sys.path.insert(0, parent_dir); sys.path.insert(0, '../')
try:
    from qae_utils.Files import read_ts_file, read_json_file, create_folder_if_needed
    from qae_utils.Window import ts_wind_make, ts_wind_split, ts_wind_flatten_avg, ts_add_noise
    print("Utils import OK")
except Exception as e:
    print("Import error:", e)
    qae_utils_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(''))), 'qae_utils')
    sys.path.insert(0, os.path.dirname(qae_utils_path))
    from qae_utils.Files import read_ts_file, read_json_file, create_folder_if_needed
    from qae_utils.Window import ts_wind_make, ts_wind_split, ts_wind_flatten_avg, ts_add_noise
    print("Absolute path fallback OK")

np.set_printoptions(suppress=True, precision=6)
plt.rcParams["figure.figsize"] = (6.5, 4)

def set_global_seed(instance_id: int):
    base = 10_000 + int(instance_id)
    random.seed(base + 11)
    np.random.seed(base + 22)
    try:
        pnp.random.seed(base + 33)
    except Exception:
        pass
    return dict(global_seed=base, numpy_seed=base+22, pnp_seed=base+33)

def ensure_dir(p): Path(p).mkdir(parents=True, exist_ok=True); return p

print("Seed/filename utils ready.")

Utils import OK
Seed/filename utils ready.


In [42]:
# =========================================
# Cell 2 — Data loading (deterministic)
# =========================================
DATA_PATH = '../jacobs_examples/aintern/data'
WINDOW_STRIDE = 1

def load_dataset(folder_name: str):
    """Return (y_all, info, folder_name) for a dataset folder."""
    y = read_ts_file(f'{DATA_PATH}/{folder_name}/y_org.arr')   # values
    info = read_json_file(f'{DATA_PATH}/{folder_name}/info.json')
    return np.asarray(y, dtype=float), info, folder_name

# ---- value→angle embedding (uses global `info` set in Cell 7 loop) ----
def embed_input(x, info_=None):
    """
    Map value-domain window x (in [lo,hi]) to RY(π·v01).
    Accepts optional info to match Stage-3 call signatures.
    """
    if info_ is None:
        info_ = info
    lo, hi = float(info_['scale_low']), float(info_['scale_high'])
    xn = (pnp.array(x) - lo) / max(hi - lo, 1e-12)   # -> [0,1]
    for i, v in enumerate(xn):
        qml.RY(v * pnp.pi, wires=i)

In [43]:
# ============================================================
# Cell 3 — Architecture (do NOT change your brick/entanglers)
# ============================================================
# This notebook = 4 qubits, 2 latent, 2 trash.
n_qubits = 4
n_latent = 2
n_trash  = n_qubits - n_latent
trash_wires = [2, 3]            # your original choice
signal_wires = list(range(4))   # Stage-1 diagnostics use all wires

# --- device factory (simple; default.qubit) ---
def make_device(nq): 
    return qml.device('default.qubit', wires=nq)

# --- Stage-1 encoder template (unchanged architecture) ---
def encoder_template(params, n_layers):
    """RX/RY/RZ per qubit + ring CNOT per layer."""
    assert len(params) == n_layers * n_qubits * 3
    for l in range(n_layers):
        # local rotations
        for q in range(n_qubits):
            idx = l * n_qubits * 3 + q * 3
            qml.RX(params[idx + 0], wires=q)
            qml.RY(params[idx + 1], wires=q)
            qml.RZ(params[idx + 2], wires=q)
        # ring entanglers
        for q in range(n_qubits-1):
            qml.CNOT(wires=[q, q+1])
        qml.CNOT(wires=[n_qubits-1, 0])

print("Architecture set: 4q (2 latent and 2 trash).")

Architecture set: 4q (2 latent and 2 trash).


In [44]:
# ====================================================
# Cell 4 — Stage-1 QNodes, loss, and training (seeded)
# ====================================================
def stage1_qnodes(n_layers):
    dev = make_device(n_qubits)

    @qml.qnode(dev, interface="autograd", diff_method="backprop")
    def trash_expectations(phi, x_clean):
        embed_input(x_clean)
        encoder_template(phi, n_layers)
        return [qml.expval(qml.PauliZ(w)) for w in trash_wires]

    @qml.qnode(dev, interface="autograd", diff_method="backprop")
    def recon_EdagE(phi, x_clean):
        embed_input(x_clean)
        encoder_template(phi, n_layers)
        qml.adjoint(encoder_template)(phi, n_layers)   # E†
        return [qml.expval(qml.PauliZ(w)) for w in range(n_qubits)]

    return trash_expectations, recon_EdagE

def stage1_batch_loss(trash_expectations, phi, clean_batch):
    # L = mean_{batch,trash} P(|1>) = (1 - Z)/2
    zs = []
    for c in clean_batch:
        z = pnp.array(trash_expectations(phi, c))  # shape (n_trash,)
        zs.append(z)
    zs = pnp.stack(zs, axis=0)
    prob_one = (1.0 - zs) * 0.5
    return pnp.mean(prob_one)

def train_stage1(X_train, X_val, n_layers, instance_id, 
                 n_epochs=120, batch_size=32, lr_init=0.010,
                 patience=10, lr_patience=8, min_delta=1e-6):
    set_global_seed(instance_id)
    # init
    enc_shape = n_layers * n_qubits * 3
    phi = pnp.array(np.random.normal(0, 0.5, enc_shape), requires_grad=True)
    opt = qml.AdamOptimizer(stepsize=lr_init)
    lr = lr_init

    trash_expectations, recon_EdagE = stage1_qnodes(n_layers)

    # helper
    def minibatches(N, B, rng_seed=123456):
        rng = np.random.default_rng(rng_seed)  # fixed per-epoch seed below
        idx = rng.permutation(N)
        for i in range(0, N, B):
            yield idx[i:i+B]

    train_hist, val_hist, lr_hist = [], [], []
    best_phi, best_val = None, float("inf")
    no_improve = 0
    for ep in range(n_epochs):
        # batch order deterministic per-epoch per-instance
        seed_ep = 77_000 + 100*instance_id + ep
        acc = 0.0; nb = 0
        for ix in minibatches(len(X_train), batch_size, rng_seed=seed_ep):
            clean_batch = X_train[ix]
            def loss_fn(p): return stage1_batch_loss(trash_expectations, p, clean_batch)
            phi, cost = opt.step_and_cost(loss_fn, phi)
            acc += float(cost); nb += 1
        train_cost = acc / max(nb, 1)

        # validation
        v_costs = []
        for c in X_val:
            v_costs.append(stage1_batch_loss(trash_expectations, phi, pnp.array([c])))
        val_cost = float(pnp.mean(pnp.stack(v_costs)))

        train_hist.append(train_cost); val_hist.append(val_cost); lr_hist.append(lr)

        if val_cost + min_delta < best_val:
            best_val, best_phi = val_cost, pnp.array(phi, requires_grad=False); no_improve = 0
        else:
            no_improve += 1
            if (no_improve % lr_patience) == 0:
                lr = max(lr * 0.5, 1e-4)
                opt = qml.AdamOptimizer(stepsize=lr)
                print(f"[Stage1] ↓ LR → {lr:.5f}")
            if no_improve >= patience:
                print("[Stage1] Early stopping."); break

        print(f"[Stage1] L={n_layers} ep {ep:03d} | train {train_cost:.6f} | val {val_cost:.6f} | LR {lr:.5f}")

    phi_best = best_phi if best_phi is not None else phi
    return dict(
        phi=phi_best, best_val=float(best_val),
        hist_train=list(map(float, train_hist)),
        hist_val=list(map(float, val_hist)),
        hist_lr=list(map(float, lr_hist)),
        recon_EdagE=recon_EdagE
    )

In [45]:
# ======================================================================
# Cell 5 — Stage-3 (ψ) with fixed decoder = adjoint(Stage-1 encoder φ)
#          Uses pre-built noisy windows (σ = NOISE_SIGMA) — no injection
# ======================================================================
import time
import numpy as np
import pennylane as qml
from pennylane import numpy as pnp

def stage3_qnodes(n_layers, phi_stage1):
    dev3 = make_device(n_qubits)
    theta_fixed = pnp.array(phi_stage1, requires_grad=False).reshape((n_layers, n_qubits, 3))

    def encoder_fixed_body(theta):
        for l in range(n_layers):
            for q in range(n_qubits):
                qml.RX(theta[l, q, 0], wires=q)
                qml.RY(theta[l, q, 1], wires=q)
                qml.RZ(theta[l, q, 2], wires=q)
            for q in range(n_qubits-1):
                qml.CNOT(wires=[q, q+1])
            qml.CNOT(wires=[n_qubits-1, 0])

    def decoder_fixed():
        qml.adjoint(encoder_fixed_body)(theta_fixed)

    @qml.qnode(dev3, interface="autograd", diff_method="backprop")
    def encoder_only_expZ_all(flat_params, x_in):
        embed_input(x_in)
        encoder_template(flat_params, n_layers)
        return [qml.expval(qml.PauliZ(w)) for w in range(n_qubits)]

    @qml.qnode(dev3, interface="autograd", diff_method="backprop")
    def teacher_code_latents(x_in):
        embed_input(x_in)
        encoder_fixed_body(theta_fixed)
        return [qml.expval(qml.PauliZ(w)) for w in range(n_latent)]

    @qml.qnode(dev3, interface="autograd", diff_method="backprop")
    def student_code_latents(flat_params, x_in):
        embed_input(x_in)
        encoder_template(flat_params, n_layers)
        return [qml.expval(qml.PauliZ(w)) for w in range(n_latent)]

    @qml.qnode(dev3, interface="autograd", diff_method="backprop")
    def denoiser_qnode_all(flat_params, x_noisy):
        embed_input(x_noisy)
        encoder_template(flat_params, n_layers)
        decoder_fixed()
        return [qml.expval(qml.PauliZ(w)) for w in range(n_qubits)]

    return dict(
        theta_fixed=theta_fixed,
        encoder_only_expZ_all=encoder_only_expZ_all,
        teacher_code_latents=teacher_code_latents,
        student_code_latents=student_code_latents,
        denoiser_qnode_all=denoiser_qnode_all
    )

# ----- value readout helpers -----
def Z_to_values_autograd(z_all):
    z_all = pnp.clip(pnp.asarray(z_all), -0.999999, 0.999999)
    v01 = pnp.arccos(z_all) / pnp.pi
    return v01 * (info["scale_high"] - info["scale_low"]) + info["scale_low"]

def first_diff(x): 
    x = pnp.array(x); return x[1:] - x[:-1]

def p1_from_expZ(z): 
    return (1 - pnp.asarray(z)) * 0.5

# ----- validation using pre-noised windows (paired with clean) -----
def stage3_val_from_pre_noised(psi, X_noisy, X_clean):
    ms_noisy, ms_deno = [], []
    for n, c in zip(X_noisy, X_clean):
        zD = np.array(stage3_handles["denoiser_qnode_all"](psi, n))
        v_hat = np.array(Z_to_values_autograd(zD))
        ms_noisy.append(float(np.mean((np.asarray(c) - np.asarray(n))**2)))
        ms_deno.append(float(np.mean((np.asarray(c) - v_hat)**2)))
    mN, mD = float(np.mean(ms_noisy)), float(np.mean(ms_deno))
    d_pct = 100.0 * (1.0 - mD / max(mN, 1e-12))
    return mN, mD, d_pct

def huber(residual, delta):
    r = pnp.abs(residual)
    return pnp.where(r <= delta, 0.5*r**2, delta*(r - 0.5*delta))

def train_stage3(X_train_clean, X_val_clean, X_train_noisy, X_val_noisy,
                 phi_stage1, n_layers, instance_id,
                 MAX_EPOCHS=60, BATCH=16, 
                 LR_START=0.003, PATIENCE=10, PLATEAU_STEPS=5, PLATEAU_FACTOR=0.5,
                 CLIP_NORM=2.0, USE_EMA=True, EMA_DECAY=0.99):

    set_global_seed(instance_id)

    global stage3_handles
    stage3_handles = stage3_qnodes(n_layers, phi_stage1)
    enc_all   = stage3_handles["encoder_only_expZ_all"]
    teacher_l = stage3_handles["teacher_code_latents"]
    denoise   = stage3_handles["denoiser_qnode_all"]

    # init ψ near φ
    phi_flat = pnp.array(phi_stage1, requires_grad=False)
    psi = pnp.array(np.array(phi_flat) + 0.05*np.random.randn(len(phi_flat)), requires_grad=True)

    # loss weights (same as before)
    ALPHA_REC, BETA_TF, GAMMA_TRASH, L_TV, L_ANCH = 1.0, 0.05, 0.5, 0.05, 2e-4
    DELTA_TV, DELTA_Z = 0.02, 0.25

    # single-window loss using pre-noised pair
    def loss_on_window_pre(params, clean_values, noisy_values):
        z_all = pnp.array(enc_all(params, noisy_values))
        z_sig, z_tr = z_all[:n_latent], z_all[n_latent:]
        zD = pnp.array(denoise(params, noisy_values))
        v_hat = Z_to_values_autograd(zD)

        L_rec = pnp.mean((pnp.array(clean_values) - v_hat)**2)
        z_t_sig = pnp.array(teacher_l(clean_values))
        L_tf = pnp.mean(huber(z_t_sig - z_sig, DELTA_Z))
        L_tr = pnp.mean(p1_from_expZ(z_tr))
        L_tv = pnp.mean(huber(first_diff(clean_values) - first_diff(v_hat), DELTA_TV))
        L_anchor = pnp.mean((params - phi_flat)**2)
        return (ALPHA_REC*L_rec + BETA_TF*L_tf + GAMMA_TRASH*L_tr + L_TV*L_tv + L_ANCH*L_anchor)

    # manual Adam
    m = pnp.zeros_like(psi); v = pnp.zeros_like(psi)
    b1, b2, eps = 0.9, 0.999, 1e-8
    t = 0
    def adam_step(params, grad, lr):
        nonlocal m, v, t
        t += 1
        m = b1*m + (1-b1)*grad
        v = b2*v + (1-b2)*(grad*grad)
        mhat = m/(1-b1**t); vhat = v/(1-b2**t)
        return params - lr * (mhat/(pnp.sqrt(vhat)+eps))

    # batches (deterministic per-epoch)
    def batch_indices(N, B, ep_seed):
        rng = np.random.default_rng(ep_seed)
        idx = rng.permutation(N)
        for s in range(0, N, B):
            yield idx[s:s+B]

    best_params, best_val = None, float("inf")
    best_epoch = -1
    no_improve, lr = 0, LR_START
    ema = pnp.array(psi, requires_grad=False) if USE_EMA else None

    hist_train, hist_val, hist_noisy, hist_delta = [], [], [], []
    t0 = time.time()

    for ep in range(MAX_EPOCHS):
        seed_ep = 88_000 + 100*instance_id + ep
        acc, nb = 0.0, 0
        for ix in batch_indices(len(X_train_clean), BATCH, ep_seed=seed_ep):
            for k in ix:
                c = X_train_clean[int(k)]
                n = X_train_noisy[int(k)]
                L = loss_on_window_pre(psi, c, n)
                if not pnp.isfinite(L): 
                    continue
                g = qml.grad(lambda p: loss_on_window_pre(p, c, n))(psi)
                if not pnp.all(pnp.isfinite(g)): 
                    continue
                # clip
                gnorm = pnp.linalg.norm(g) + 1e-12
                if gnorm > CLIP_NORM:
                    g = g * (CLIP_NORM / gnorm)
                psi = adam_step(psi, g, lr)
                if USE_EMA:
                    ema = EMA_DECAY*ema + (1-EMA_DECAY)*psi
                acc += float(L); nb += 1

        train_loss = acc / max(nb, 1)
        eval_params = ema if USE_EMA else psi

        # value-domain validation on pre-noised pairs
        mN, mD, dV = stage3_val_from_pre_noised(eval_params, X_val_noisy, X_val_clean)
        hist_train.append(train_loss); hist_val.append(mD)
        hist_noisy.append(mN);        hist_delta.append(dV)

        if mD < best_val - 1e-12:
            best_val, best_params, best_epoch, no_improve = mD, pnp.array(eval_params, requires_grad=False), ep, 0
        else:
            no_improve += 1
            if (no_improve % PLATEAU_STEPS) == 0:
                lr = max(lr * PLATEAU_FACTOR, 1e-5)
                print(f"[Stage3] Plateau → LR {lr:.5f}")

        norm_diff = float(pnp.linalg.norm((eval_params - phi_flat)))
        print(f"[Stage3] L={n_layers} ep {ep:03d} | train {train_loss:.5f} | "
              f"val {mD:.5f} | noisy {mN:.5f} | Δ {dV:+.1f}% | LR {lr:.5f} | ||ψ-φ|| {norm_diff:.3f}")

        if no_improve >= PATIENCE:
            print("[Stage3] Early stopping."); break

    train_seconds = float(time.time() - t0)
    epochs_run = len(hist_val)
    psi_best = best_params if best_params is not None else (ema if USE_EMA else psi)

    return dict(
        psi=psi_best, 
        best_val=float(best_val),
        best_epoch=int(best_epoch),
        epochs=int(epochs_run),
        hist_train=list(map(float, hist_train)),
        hist_val=list(map(float, hist_val)),
        hist_noisy=list(map(float, hist_noisy)),
        hist_delta=list(map(float, hist_delta)),
        train_seconds=train_seconds
    )

In [46]:
# ===================================================
# Cell 6 — Windowing helpers (sequential split)
# ===================================================
window_size = n_qubits
stride = WINDOW_STRIDE

def build_windows(y_vec, window_size, stride):
    return np.array(
        [y_vec[i:i+window_size] for i in range(0, len(y_vec)-window_size+1, stride)],
        dtype=float
    )

def sequential_train_val_test_split(X_windows, test_frac=0.20, val_frac=0.20):
    """
    Split by order to avoid leakage: first train, then val, last test.
    """
    n_total = len(X_windows)
    n_test  = max(1, int(round(test_frac * n_total)))
    X_trainval = X_windows[:-n_test]
    X_test     = X_windows[-n_test:]
    n_val = max(1, int(round(val_frac * len(X_trainval))))
    X_train = X_trainval[:-n_val]
    X_val   = X_trainval[-n_val:]
    return X_train, X_val, X_test

print("Cell 6 ready: helpers defined (build_windows, sequential split).")

Cell 6 ready: helpers defined (build_windows, sequential split).


In [None]:
# ============================================
# Cell 7 — Train runs (instances × layers × datasets) — sequential split
# ============================================
import time
import numpy as np
from qae_utils.Window import ts_add_noise  # ensure imported in Cell 1

RUNS = []

NOISE_GLOBAL_SEED = 4242  # same corruption for all instances within a dataset

def build_windows(y_vec, window_size, stride):
    return np.array(
        [y_vec[i:i+window_size] for i in range(0, len(y_vec)-window_size+1, stride)],
        dtype=float
    )

def sequential_train_val_test_split(X_windows, test_frac=0.20, val_frac=0.20):
    """
    Split by order to avoid leakage: first train, then val, last test.
    """
    n_total = len(X_windows)
    n_test  = max(1, int(round(test_frac * n_total)))
    X_trainval = X_windows[:-n_test]
    X_test     = X_windows[-n_test:]
    n_val = max(1, int(round(val_frac * len(X_trainval))))
    X_train = X_trainval[:-n_val]
    X_val   = X_trainval[-n_val:]
    return X_train, X_val, X_test

window_size = n_qubits
stride = WINDOW_STRIDE

for dataset_folder in DATASETS_TO_RUN:
    # ---- load clean series + info (used by embed & value scaling)
    y_all, info_dset, _ = load_dataset(dataset_folder)
    info = info_dset  # set global used by QNodes/encoders

    # ---- create pre-noised series (σ = 0.20) once per dataset
    y_noisy_all = ts_add_noise(
        y_all,
        noise=NOISE_SIGMA,            # 0.20 defined in Cell 0
        noise_type='normal',
        clip=False,
        range_low=info['scale_low'],
        range_high=info['scale_high'],
        seed=NOISE_GLOBAL_SEED
    )

    # ---- build windows (clean & noisy) with identical alignment
    X_clean_all = build_windows(y_all,       window_size, stride)
    X_noisy_all = build_windows(y_noisy_all, window_size, stride)
    assert len(X_clean_all) == len(X_noisy_all), "Clean/noisy window counts must match."

    # ---- sequential split (apply same scheme to both)
    X_train_clean, X_val_clean, X_test_clean = sequential_train_val_test_split(
        X_clean_all, test_frac=0.20, val_frac=0.20
    )
    X_train_noisy, X_val_noisy, X_test_noisy = sequential_train_val_test_split(
        X_noisy_all, test_frac=0.20, val_frac=0.20
    )

    print(f"[{dataset_folder}] windows: {len(X_clean_all)} (W={window_size}, step={stride})")
    print(f"[{dataset_folder}] split → train={len(X_train_clean)}, val={len(X_val_clean)}, test={len(X_test_clean)}")

    # ---- run both depths & instances for THIS dataset
    for L in LAYER_OPTIONS:
        for inst in INSTANCE_IDS:
            print(f"\n==============================")
            print(f"[{dataset_folder}] Instance {inst} | Layers {L}")
            print(f"==============================")

            # Stage 1 — train on CLEAN windows
            t0 = time.time()
            s1 = train_stage1(
                X_train_clean, X_val_clean,
                n_layers=L,
                instance_id=inst,
                n_epochs=120, batch_size=32,
                lr_init=0.010, patience=10, lr_patience=8, min_delta=1e-6
            )
            t1 = time.time()

            # Stage 3 — train on PRE-NOISED windows (σ=0.20)
            s3 = train_stage3(
                X_train_clean, X_val_clean,
                X_train_noisy, X_val_noisy,
                phi_stage1=s1["phi"],
                n_layers=L,
                instance_id=inst,
                MAX_EPOCHS=60, BATCH=16,
                LR_START=0.003, PATIENCE=10, PLATEAU_STEPS=5, PLATEAU_FACTOR=0.5,
                CLIP_NORM=2.0, USE_EMA=True, EMA_DECAY=0.99
            )
            t2 = time.time()

            RUNS.append({
                "dataset_folder": dataset_folder,
                "dataset_info": {"scale_low": float(info["scale_low"]), "scale_high": float(info["scale_high"])},
                "instance_id": inst,
                "n_layers": L,
                "stage1": {
                    "phi": s1["phi"],
                    "best_val": s1["best_val"],
                    "hist_train": s1["hist_train"],
                    "hist_val": s1["hist_val"],
                    "hist_lr": s1["hist_lr"],
                    "best_epoch": s1.get("best_epoch"),
                    "epochs": s1.get("epochs"),
                    "train_seconds": float(t1 - t0),
                },
                "stage3": {
                    "psi": s3["psi"],
                    "best_val": s3["best_val"],
                    "best_epoch": s3.get("best_epoch"),
                    "epochs": s3.get("epochs"),
                    "hist_train": s3["hist_train"],
                    "hist_val": s3["hist_val"],
                    "hist_noisy": s3.get("hist_noisy", []),
                    "hist_delta": s3.get("hist_delta", []),
                    "train_seconds": float(t2 - t1),
                }
            })

print(f"\nCompleted {len(RUNS)} runs across {len(DATASETS_TO_RUN)} datasets.")

[mackey_glass_tau17_n200] windows: 197 (W=4, step=1)
[mackey_glass_tau17_n200] split → train=126, val=32, test=39

[mackey_glass_tau17_n200] Instance 1 | Layers 1
[Stage1] L=1 ep 000 | train 0.492434 | val 0.486984 | LR 0.01000
[Stage1] L=1 ep 001 | train 0.490016 | val 0.484140 | LR 0.01000
[Stage1] L=1 ep 002 | train 0.487615 | val 0.481283 | LR 0.01000
[Stage1] L=1 ep 003 | train 0.485058 | val 0.478396 | LR 0.01000
[Stage1] L=1 ep 004 | train 0.482569 | val 0.475409 | LR 0.01000
[Stage1] L=1 ep 005 | train 0.479388 | val 0.472485 | LR 0.01000
[Stage1] L=1 ep 006 | train 0.476619 | val 0.469424 | LR 0.01000
[Stage1] L=1 ep 007 | train 0.473303 | val 0.466353 | LR 0.01000
[Stage1] L=1 ep 008 | train 0.470964 | val 0.463099 | LR 0.01000
[Stage1] L=1 ep 009 | train 0.467580 | val 0.459785 | LR 0.01000
[Stage1] L=1 ep 010 | train 0.464121 | val 0.456428 | LR 0.01000
[Stage1] L=1 ep 011 | train 0.460391 | val 0.452986 | LR 0.01000
[Stage1] L=1 ep 012 | train 0.457310 | val 0.449473 | LR 

In [None]:
# ======================================================================
# Cell 8 — Save artifacts (dataset-specific JSON bundles + CSV)
# ======================================================================
from pathlib import Path
import json, time, os, csv
import numpy as np
import pandas as pd

# --- hyperparams (for logging) ---
S1_LR_INIT       = 0.010
S1_MAX_EPOCHS    = 120
S1_PATIENCE      = 10
S1_LR_PATIENCE   = 8

S3_LR_INIT       = 0.003
S3_MAX_EPOCHS    = 60
S3_PATIENCE      = 10
S3_PLATEAU_STEPS = 5
S3_PLATEAU_FACT  = 0.5

CSV_SCHEMA_VERSION = "v2"

def ensure_dir(p): Path(p).mkdir(parents=True, exist_ok=True); return p

CSV_HEADER = [
    "filename","run_tag","dataset_folder","instance_id","rng_seed",
    "n_qubits","n_latent","n_trash","n_layers",
    "sigma_train","sigma_eval","window_stride",
    "s1_lr_init","s1_max_epochs","s1_patience","s1_lr_patience",
    "s1_best_val","s1_final_val","s1_best_epoch","s1_epochs","s1_train_seconds",
    "s3_lr_init","s3_max_epochs","s3_patience","s3_plateau_steps","s3_plateau_factor",
    "s3_best_val_mse","s3_final_val_mse","s3_best_epoch","s3_epochs","s3_train_seconds",
    "s3_noisy_baseline_mse","s3_best_delta_pct","s3_final_delta_pct",
    "phi_params","psi_params",
    "total_train_seconds",
]

def ensure_csv(path, header):
    needs_header = True
    if os.path.exists(path):
        try:
            with open(path, "r", encoding="utf-8") as f:
                first_line = f.readline().rstrip("\n")
            needs_header = (first_line != ",".join(header))
        except Exception:
            needs_header = True
    if needs_header:
        ensure_dir(os.path.dirname(path))
        with open(path, "w", newline="", encoding="utf-8") as f:
            csv.writer(f).writerow(header)

def _safe_argmin(seq):
    try:
        return int(np.nanargmin(seq)) if len(seq) else -1
    except Exception:
        return -1

def _safe_last(seq):
    return float(seq[-1]) if (isinstance(seq, (list, tuple)) and len(seq)) else np.nan

def std_instance_name(nq, n_latent, n_trash, n_layers, instance_id):
    # keep your historical naming
    return f"{int(nq)}q_{int(n_latent)}l_{int(n_trash)}t_{int(n_layers)}ls_{int(instance_id):02d}.json"

def save_one_run(run):
    dataset_folder = run["dataset_folder"]
    dataset_info   = run.get("dataset_info", {})
    scale_low      = float(dataset_info.get("scale_low", np.nan))
    scale_high     = float(dataset_info.get("scale_high", np.nan))

    # THIS notebook's fixed architecture
    n_qubits = 4
    n_latent = 2
    n_trash  = n_qubits - n_latent

    # per-dataset, per-model-set subroot:
    subroot = ensure_dir(f"{OUT_BASE}/{dataset_folder}/q{n_qubits}_l{n_latent}t{n_trash}")
    # CSV is stored inside the model-set folder (so each set has its own CSVs)
    csv_path = f"{subroot}/all_training_instances_{CSV_SCHEMA_VERSION}.csv"
    ensure_csv(csv_path, CSV_HEADER)

    inst = int(run["instance_id"])
    L    = int(run["n_layers"])
    seed = int(run.get("seed", inst))

    # JSONs live in L1/ or L3/
    out_dir = ensure_dir(f"{subroot}/L{L}")
    fname = std_instance_name(n_qubits, n_latent, n_trash, L, inst)
    bundle_path = os.path.join(out_dir, fname)

    s1 = run["stage1"]; s3 = run["stage3"]

    # Stage-1
    s1_hist_val   = list(map(float, s1.get("hist_val", [])))
    s1_best_val   = float(s1.get("best_val", np.nan))
    s1_final_val  = _safe_last(s1_hist_val)
    s1_best_epoch = int(s1.get("best_epoch", _safe_argmin(s1_hist_val)))
    s1_epochs     = int(s1.get("epochs", len(s1_hist_val)))
    s1_seconds    = float(s1.get("train_seconds", np.nan))

    # Stage-3
    s3_hist_val   = list(map(float, s3.get("hist_val", [])))
    s3_hist_noisy = list(map(float, s3.get("hist_noisy", [])))
    s3_hist_delta = list(map(float, s3.get("hist_delta", [])))

    s3_best_val   = float(s3.get("best_val", np.nan))
    s3_final_val  = _safe_last(s3_hist_val)
    s3_best_epoch = int(s3.get("best_epoch", _safe_argmin(s3_hist_val)))
    s3_epochs     = int(s3.get("epochs", len(s3_hist_val)))
    s3_seconds    = float(s3.get("train_seconds", np.nan))

    noisy_baseline = float(np.nanmean(s3_hist_noisy)) if len(s3_hist_noisy) else np.nan
    best_delta     = float(np.nanmax(s3_hist_delta))  if len(s3_hist_delta)  else np.nan
    final_delta    = _safe_last(s3_hist_delta)

    bundle = {
        "schema": {"name": "half_qae_bundle", "version": "1.0"},
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
        "dataset": {
            "id": dataset_folder,
            "scale_low":  scale_low,
            "scale_high": scale_high,
            "window_size": int(n_qubits),
            "window_stride": int(WINDOW_STRIDE),
            "noise_sigma": float(NOISE_SIGMA),
            "noise_mode": "pre_series_normal_clip_false"
        },
        "run": {
            "tag": f"inst{inst}_L{L}",
            "instance_id": inst,
            "seed": seed,
            "sigma_train": float(NOISE_SIGMA),
            "sigma_eval":  float(NOISE_SIGMA),
        },
        "architecture": {
            "n_qubits": int(n_qubits),
            "n_layers": int(L),
            "n_latent": int(n_latent),
            "n_trash":  int(n_trash),
            "latent_wires": list(range(n_latent)),
            "trash_wires":  list(range(n_latent, n_qubits)),
        },
        "training": {
            "stage1": {
                "lr_init": S1_LR_INIT, "max_epochs": S1_MAX_EPOCHS,
                "patience": S1_PATIENCE, "lr_patience": S1_LR_PATIENCE,
                "best_val": s1_best_val, "final_val": s1_final_val,
                "best_epoch": s1_best_epoch, "epochs": s1_epochs,
                "train_curve": s1.get("hist_train", []), "val_curve": s1_hist_val, "lr_curve": s1.get("hist_lr", []),
                "train_seconds": s1_seconds,
            },
            "stage3": {
                "lr_init": S3_LR_INIT, "max_epochs": S3_MAX_EPOCHS,
                "patience": S3_PATIENCE, "plateau_steps": S3_PLATEAU_STEPS, "plateau_factor": S3_PLATEAU_FACT,
                "best_val_mse": s3_best_val, "final_val_mse": s3_final_val,
                "best_epoch": s3_best_epoch, "epochs": s3_epochs,
                "train_curve": s3.get("hist_train", []), "val_curve": s3_hist_val,
                "noisy_curve": s3.get("hist_noisy", []), "delta_curve": s3.get("hist_delta", []),
                "train_seconds": s3_seconds,
            }
        },
        "parameters": {
            "phi_stage1": np.array(s1.get("phi", [])).tolist(),
            "psi_stage3": np.array(s3.get("psi", [])).tolist(),
        },
    }
    ensure_dir(os.path.dirname(bundle_path))
    with open(bundle_path, "w", encoding="utf-8") as f:
        json.dump(bundle, f, indent=2)
    print(f"Saved bundle → {bundle_path}")

    # CSV row (per-model-set CSV inside subroot)
    phi_params = json.dumps(bundle["parameters"]["phi_stage1"])
    psi_params = json.dumps(bundle["parameters"]["psi_stage3"])
    total_seconds = float((0 if np.isnan(s1_seconds) else s1_seconds) + (0 if np.isnan(s3_seconds) else s3_seconds))

    row = [
        os.path.basename(bundle_path),
        f"inst{inst}_L{L}",
        dataset_folder,
        inst, seed,
        int(n_qubits), int(n_latent), int(n_trash), int(L),
        f"{NOISE_SIGMA:.3f}", f"{NOISE_SIGMA:.3f}", int(WINDOW_STRIDE),
        f"{S1_LR_INIT:.6f}", int(S1_MAX_EPOCHS), int(S1_PATIENCE), int(S1_LR_PATIENCE),
        f"{s1_best_val:.8f}", f"{s1_final_val:.8f}", s1_best_epoch, s1_epochs, s1_seconds,
        f"{S3_LR_INIT:.6f}", int(S3_MAX_EPOCHS), int(S3_PATIENCE), int(S3_PLATEAU_STEPS), f"{S3_PLATEAU_FACT:.3f}",
        f"{s3_best_val:.8f}", f"{s3_final_val:.8f}", s3_best_epoch, s3_epochs, s3_seconds,
        noisy_baseline, best_delta, final_delta,
        phi_params, psi_params,
        total_seconds,
    ]

    # upsert into per-model-set CSV
    row_df = pd.DataFrame([row], columns=CSV_HEADER)
    if Path(csv_path).exists():
        df_old = pd.read_csv(csv_path)
        key = os.path.basename(bundle_path)
        if "filename" in df_old.columns:
            df_old = df_old[df_old["filename"] != key]
        df_new = pd.concat([df_old, row_df], ignore_index=True)
        df_new.to_csv(csv_path, index=False)
    else:
        row_df.to_csv(csv_path, index=False)
    print(f"Upserted CSV row  → {csv_path}")

# ---- save all runs ----
for run in RUNS:
    save_one_run(run)

print("\nAll runs saved per dataset & model-set.")

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'

In [None]:
# =========================================================
# Cell 9 — Build & preview the training-only results tables
#            (one CSV per dataset *and* per model-set)
# =========================================================
import pandas as pd
import numpy as np
from pathlib import Path

def mean_std_safe(s: pd.Series) -> str:
    v = pd.to_numeric(s, errors="coerce").astype(float)
    v = v[np.isfinite(v)]
    if v.size == 0: return "n/a"
    return f"{v.mean():.6f} ± {v.std(ddof=0):.6f}"

CSV_SCHEMA_VERSION = "v2"

# THIS notebook = q4_l2t2 only
n_qubits, n_latent, n_trash = 4, 2, 2

for dataset_folder in DATASETS_TO_RUN:
    subroot = f"{OUT_BASE}/{dataset_folder}/q{n_qubits}_l{n_latent}t{n_trash}"
    csv_path = f"{subroot}/all_training_instances_{CSV_SCHEMA_VERSION}.csv"
    if not Path(csv_path).exists():
        print(f"[WARN] CSV not found for {dataset_folder}: {csv_path}")
        continue

    df = pd.read_csv(csv_path)

    # drop dups per file (keep last)
    if "filename" in df.columns:
        df = df.drop_duplicates(subset=["filename"], keep="last")
    else:
        df = df.drop_duplicates(subset=["run_tag","instance_id","n_layers"], keep="last")

    # numeric casts (safe)
    for col in [
        "s3_noisy_baseline_mse","s3_best_delta_pct","s3_final_delta_pct",
        "s3_best_val_mse","s3_final_val_mse",
        "s1_best_val","s1_final_val",
        "s1_train_seconds","s3_train_seconds","total_train_seconds",
        "s1_best_epoch","s1_epochs","s3_best_epoch","s3_epochs"
    ]:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors="coerce")

    df = df.sort_values(["n_layers","instance_id"]).reset_index(drop=True)
    clean_path = f"{subroot}/all_training_instances_{CSV_SCHEMA_VERSION}.csv"
    Path(subroot).mkdir(parents=True, exist_ok=True)
    df.to_csv(clean_path, index=False)
    print(f"[{dataset_folder}] Saved table → {clean_path}")

    # per-layer summary
    grp = df.groupby("n_layers", dropna=False)
    summary = pd.DataFrame({"runs": grp.size()})
    for col, label in [
        ("s3_noisy_baseline_mse", "noisy_baseline_mse (mean±std)"),
        ("s3_best_val_mse",       "best_val_mse (mean±std)"),
        ("s3_final_val_mse",      "final_val_mse (mean±std)"),
        ("s3_best_delta_pct",     "best_delta_pct (mean±std)"),
        ("s3_final_delta_pct",    "final_delta_pct (mean±std)"),
        ("s1_best_val",           "s1_best_val (mean±std)"),
    ]:
        if col in df.columns and np.isfinite(df[col]).any():
            summary[label] = grp[col].apply(mean_std_safe)

    summary_path = f"{subroot}/summary_by_layers_{CSV_SCHEMA_VERSION}.csv"
    summary.to_csv(summary_path, index=True)
    print(f"[{dataset_folder}] Saved per-layer summary → {summary_path}")

    display(df.head(8))
    display(summary)

Saved training-only table → ./runs_halfqae/all_training_instances_v2.csv
Saved per-layer summary → ./runs_halfqae/summary_by_layers_v2.csv


Unnamed: 0,filename,run_tag,dataset_folder,instance_id,rng_seed,n_qubits,n_latent,n_trash,n_layers,sigma_train,...,s3_final_val_mse,s3_best_epoch,s3_epochs,s3_train_seconds,s3_noisy_baseline_mse,s3_best_delta_pct,s3_final_delta_pct,phi_params,psi_params,total_train_seconds
0,4q_2l_2t_1ls_01.json,inst1_L1,mackey_glass_n100,1,1,4,2,2,1,0.1,...,0.003574,9,20,32.788465,0.003908,11.878268,11.878268,"[0.00017403888647591283, -1.9710404535850785, ...","[-0.0006123296470692364, -2.0262762252890805, ...",86.059745
1,4q_2l_2t_1ls_02.json,inst2_L1,mackey_glass_n100,2,2,4,2,2,1,0.1,...,0.003603,6,17,28.605802,0.003908,11.336218,11.336218,"[1.8056755714875147e-05, -1.9193147771584536, ...","[-0.005484925176926273, -2.0216083452573383, -...",81.824216
2,4q_2l_2t_1ls_03.json,inst3_L1,mackey_glass_n100,3,3,4,2,2,1,0.1,...,0.003016,28,39,64.523617,0.003908,23.067725,23.067725,"[0.00021396068902014424, 1.0656310158671813, 0...","[0.0022221163621398794, 1.0046762387673538, 1....",102.940782
3,4q_2l_2t_1ls_04.json,inst4_L1,mackey_glass_n100,4,4,4,2,2,1,0.1,...,0.003487,10,21,37.238774,0.003908,15.307742,15.307742,"[3.9331420911905245e-05, -2.0191278619373705, ...","[0.0016684085266360368, -2.0860432857437528, 0...",91.399046
4,4q_2l_2t_1ls_05.json,inst5_L1,mackey_glass_n100,5,5,4,2,2,1,0.1,...,0.003599,6,17,27.81092,0.003908,12.813947,12.813947,"[-3.1875221778984855e-05, -1.909854775842016, ...","[-0.012663044461042546, -2.009391188962638, -0...",75.792944
5,4q_2l_2t_3ls_01.json,inst1_L3,mackey_glass_n100,1,1,4,2,2,3,0.1,...,0.002705,36,47,192.629691,0.003908,32.257238,32.257238,"[1.4721530341460392, 0.03237289093849179, -0.1...","[1.5373755944746403, 0.013355311686751007, -0....",328.944969
6,4q_2l_2t_3ls_02.json,inst2_L3,mackey_glass_n100,2,2,4,2,2,3,0.1,...,0.002606,23,34,140.302912,0.003908,33.813029,33.813029,"[0.019330858162530336, -0.28521034298242903, -...","[-0.008000250494283483, -0.2607754835185739, -...",277.924827
7,4q_2l_2t_3ls_03.json,inst3_L3,mackey_glass_n100,3,3,4,2,2,3,0.1,...,0.003348,9,20,83.783044,0.003908,21.23954,21.23954,"[0.7013220902762077, 1.1604341975538626, 0.576...","[0.5618612928893267, 1.2522178748846688, 0.577...",222.222205
8,4q_2l_2t_3ls_04.json,inst4_L3,mackey_glass_n100,4,4,4,2,2,3,0.1,...,0.003031,11,22,90.352279,0.003908,24.546582,24.546582,"[-0.20819083620731277, 0.033144625958318935, 1...","[-0.14742413582161445, 0.06266292593559304, 1....",229.604356
9,4q_2l_2t_3ls_05.json,inst5_L3,mackey_glass_n100,5,5,4,2,2,3,0.1,...,0.002757,16,27,107.694295,0.003908,32.257212,32.257212,"[1.3362803038788154, 0.03851371435683986, 1.06...","[1.574032281646895, 0.03304526435829487, 1.299...",241.914114


Unnamed: 0_level_0,runs,noisy_baseline_mse (mean±std),best_val_mse (mean±std),final_val_mse (mean±std),best_delta_pct (mean±std),final_delta_pct (mean±std),s1_best_val (mean±std)
n_layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,5,0.003908 ± 0.000000,0.003326 ± 0.000169,0.003456 ± 0.000224,14.880780 ± 4.314143,14.880780 ± 4.314143,0.231989 ± 0.003387
3,5,0.003908 ± 0.000000,0.002781 ± 0.000195,0.002889 ± 0.000269,28.822720 ± 4.985676,28.822720 ± 4.985676,0.055975 ± 0.015050
