In [1]:
# ============================================
# Cell 0 — Experiment plan & seeds (GLOBAL)
# ============================================
# We'll run 5 instances (same across all notebooks) and two depths: 1 and 3 layers.
INSTANCE_IDS   = [1, 2, 3, 4, 5]   # used in filenames as ..._ls_01.json, ..._ls_02.json, ...
LAYER_OPTIONS  = [1, 3]            # train 1-layer first, then 3-layers
EVAL_SIGMA     = 0.10              # fixed noise everywhere (train & eval)

# where to save artifacts (JSON bundles, instance records, CSV summary)
OUT_BASE = "./runs_halfqae"        # change if you like; subfolders will be created automatically
CSV_PATH = f"{OUT_BASE}/results_instances.csv"  # will be appended-to if exists

In [2]:
# ==== IMPORTY ====
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os, sys, json, math, random, time, hashlib
import pennylane as qml
from pennylane import numpy as pnp

In [3]:
# ==== USTAWIENIA OGÓLNE ====
np.random.seed(42)

# symulator (szybki)
BACKEND = "lightning.qubit"

# Half-QAE
n_qubits = 4
n_latent = 3
n_trash  = n_qubits - n_latent  # 1

# dane Mackey-Glass 
beta=0.25
gamma=0.1
n=10
tau=15
dt=1.0
T=300

# skalowanie
margin = 0.2
scale_low, scale_high = 0.0+margin, 1.0-margin

In [4]:
# ==== POMOCNICZE ====
def scale_values(x, new_min=0.0, new_max=1.0):
    x_min, x_max = np.min(x), np.max(x)
    return new_min + (x - x_min) * (new_max - new_min) / max(1e-12, (x_max - x_min))

def mackey_glass(beta=0.2, gamma=0.1, n=10, tau=17, dt=0.1, T=1000):
    N = int(T/dt)
    delay_steps = int(tau/dt)
    x = np.zeros(N+delay_steps)
    x[0:delay_steps] = 1.2
    for t in range(delay_steps, N+delay_steps-1):
        x_tau = x[t-delay_steps]
        dxdt = beta * x_tau / (1 + x_tau**n) - gamma * x[t]
        x[t+1] = x[t] + dxdt * dt
    return x[delay_steps:]

def ts_add_noise(X, noise=0.1, low=0.0, high=1.0):
    Z = X + np.random.normal(0.0, noise, size=X.shape)
    return np.clip(Z, low, high)

In [5]:
# ==== GENEROWANIE DANYCH ====
y_raw = mackey_glass(beta=beta, gamma=gamma, n=n, tau=tau, dt=dt, T=T)
y_raw = scale_values(y_raw, new_min=scale_low, new_max=scale_high)
y = y_raw[2::3]                               # subsampling
X_idx = np.arange(len(y))

# okna (sliding window)
window_size = n_qubits
stride = 1
X_windows = np.stack([y[i:i+window_size] for i in range(0, len(y)-window_size+1, stride)])

# podziały: train/val/test = 0.6 / 0.2 / 0.2
X_temp, X_test = train_test_split(X_windows, test_size=0.2, random_state=42)
X_train, X_val = train_test_split(X_temp,   test_size=0.25, random_state=42)

# czyste zbiory (na Stage 1 i Stage 2 generujemy szum dynamicznie)
X_train_clean = X_train.copy()
X_val_clean   = X_val.copy()
X_test_clean  = X_test.copy()

In [6]:
# ==== ANSATZE  ====
def encoder_ansatz(params, x, wires=None):
    W = list(range(n_qubits)) if wires is None else list(wires)
    # angle encoding
    for w, val in zip(W, x[:len(W)]):
        qml.RY(val*np.pi, wires=w)
    # warstwy rotacji
    n_block = len(W)
    n_layers = len(params) // (n_block * 3)
    for layer in range(n_layers):
        for j, w in enumerate(W):
            idx = layer * n_block * 3 + j * 3
            qml.RX(params[idx],     wires=w)
            qml.RY(params[idx + 1], wires=w)
            qml.RZ(params[idx + 2], wires=w)
        for a, b in zip(W, W[1:]):
            qml.CNOT(wires=[a, b])
        qml.CNOT(wires=[W[-1], W[0]])

def decoder_ansatz(params, wires=None):
    W = list(range(n_qubits)) if wires is None else list(wires)
    n_block = len(W)
    n_layers = len(params) // (n_block * 3)
    for layer in range(n_layers):
        for a, b in zip(W, W[1:]):
            qml.CNOT(wires=[a, b])
        qml.CNOT(wires=[W[-1], W[0]])
        for j, w in enumerate(W):
            idx = layer * n_block * 3 + j * 3
            qml.RZ(params[idx + 2], wires=w)
            qml.RY(params[idx + 1], wires=w)
            qml.RX(params[idx],     wires=w)

def adjoint_decoder_ansatz(params, x, wires=None):
    W = list(range(n_qubits)) if wires is None else list(wires)
    n_block = len(W)
    for w, val in zip(W, x[:n_block]):
        qml.RY(val*np.pi, wires=w)
    n_layers = len(params) // (n_block * 3)
    for layer in reversed(range(n_layers)):
        for j in reversed(range(n_block)):
            w = W[j]
            idx = layer * n_block * 3 + j * 3
            qml.RX(-params[idx],     wires=w)
            qml.RY(-params[idx + 1], wires=w)
            qml.RZ(-params[idx + 2], wires=w)
        for j in reversed(range(n_block - 1)):
            qml.CNOT(wires=[W[j+1], W[j]])
        qml.CNOT(wires=[W[-1], W[0]])


In [7]:
# ==== STAGE 1: swap-test na adjoint_decoder (uczenie „trash->czysto”) ====
# inicjalizacje
#n_layers = 2
#param_shape = n_layers * n_qubits * 3

trash2_start = n_qubits               # 4
ancilla      = n_qubits + n_trash     # 6
num_total_s1 = n_qubits + n_trash + 1 # 7

dev_s1 = qml.device(BACKEND, wires=num_total_s1, shots=None)

@qml.qnode(dev_s1, interface="autograd")
def swap_test_on_adj_decoder(x, params):
    main   = list(range(n_qubits))                                # [0,1,2,3]
    trash1 = list(range(n_latent, n_latent + n_trash))            # [2,3]
    trash2 = list(range(trash2_start, trash2_start + n_trash))    # [4,5]

    adjoint_decoder_ansatz(params, x)

    for i in trash2:
        qml.Hadamard(wires=i)
    qml.Hadamard(wires=ancilla)
    for i in range(n_trash):
        qml.CSWAP(wires=[ancilla, trash1[i], trash2[i]])
    qml.Hadamard(wires=ancilla)

    return qml.probs(wires=ancilla)

def s1_cost(params, X_batch):
    acc = pnp.array(0.0)
    for x in X_batch:
        p0 = swap_test_on_adj_decoder(x, params)[0]
        acc = acc + (1.0 - p0)
    return acc / len(X_batch)

def train_adjoint_decoder(params_init, X_train, n_epochs=80, batch_size=8, lr=0.01, X_val_clean=None, seed=0):  # <-- ZMIANA
    rng = np.random.default_rng(seed)  # <-- ZMIANA
    params = pnp.array(params_init, requires_grad=True)
    opt = qml.AdamOptimizer(stepsize=lr)
    hist = []
    hist_val = []
    lr_hist = []
    best_params, best_val = None, float("inf")
    best_epoch = -1
    no_improve = 0
    t0 = time.time()
    for ep in range(n_epochs):
        idx = rng.permutation(len(X_train))  # <-- ZMIANA
        s=0.0; nb=0
        for i in range(0, len(X_train), batch_size):
            Xb = X_train[idx[i:i+batch_size]]
            params, c = opt.step_and_cost(lambda p: s1_cost(p, Xb), params)
            s += float(c); nb += 1
        train_cost = s/nb
        hist.append(train_cost)
        lr_hist.append(opt.stepsize)
        # --- walidacja ---
        if X_val_clean is not None:
            val_cost = float(s1_cost(params, X_val_clean))
            hist_val.append(val_cost)
        else:
            val_cost = train_cost
            hist_val.append(train_cost)
        print(f"[Stage1] L={len(params_init)//(n_qubits*3)} ep {ep:03d} | train {train_cost:.6f} | val {val_cost:.6f} | LR {opt.stepsize:.5f}")
        if train_cost < best_val - 1e-12:
            best_val, best_params, best_epoch, no_improve = train_cost, pnp.array(params, requires_grad=False), ep, 0
        else:
            no_improve += 1
        if no_improve >= 10:
            break
    train_seconds = float(time.time() - t0)
    return dict(
        phi=best_params if best_params is not None else params,
        best_val=float(best_val),
        best_epoch=int(best_epoch),
        epochs=len(hist),
        hist_train=list(map(float, hist)),
        hist_val=list(map(float, hist_val)),
        hist_lr=list(map(float, lr_hist)),
        train_seconds=train_seconds
    )

In [8]:
# ==== STAGE 2: trenujemy ENCODER latent↔latent (noisy vs clean_ref) ====
dev_s2 = qml.device(BACKEND, wires=2*n_qubits + 1, shots=None)

@qml.qnode(dev_s2, interface="autograd")
def swap_test_encoder_latent(x_noisy, x_clean, enc_params, dec_dagger_params):
    encoder = list(range(n_qubits))                  # blok encodera
    ref     = list(range(n_qubits, 2*n_qubits))      # blok referencyjny
    anc     = 2*n_qubits

    encoder_ansatz(enc_params, x_noisy, wires=encoder)
    adjoint_decoder_ansatz(dec_dagger_params, x_clean, wires=ref)

    qml.Hadamard(wires=anc)
    for i in range(n_latent):
        qml.CSWAP(wires=[anc, encoder[i], ref[i]])   # SWAP tylko latentów
    qml.Hadamard(wires=anc)
    return qml.probs(wires=anc)

def s2_cost(enc_params, X_noisy_b, X_clean_b, dec_dagger_params):
    acc = pnp.array(0.0)
    for xn, xc in zip(X_noisy_b, X_clean_b):
        p0 = swap_test_encoder_latent(xn, xc, enc_params, dec_dagger_params)[0]
        acc = acc + (1.0 - p0)
    return acc / len(X_noisy_b)

# skala szumu czerpana z realnego skalowania danych
info = {"scale_low": scale_low, "scale_high": scale_high}

def s2_cost_dataset(enc_params, X_clean_set, dec_params, noise_level):
    """Metryka testowa S2: generujemy szum o danym poziomie i liczymy koszt."""
    low, high = info["scale_low"], info["scale_high"]
    sigma = noise_level * (high - low)
    X_noisy_set = ts_add_noise(X_clean_set, noise=sigma, low=low, high=high)
    return float(s2_cost(enc_params, X_noisy_set, X_clean_set, dec_params))

def train_encoder_with_sidekick_dyn_noise(enc_params_init, X_clean,
                                         dec_dagger_params, noise_level,
                                         n_epochs=80, batch_size=8, lr=0.01, seed=0,
                                         X_val_clean=None):
    params = pnp.array(enc_params_init, requires_grad=True)
    opt = qml.AdamOptimizer(stepsize=lr)
    hist = []
    hist_val = []
    lr_hist = []
    hist_noisy = []
    hist_delta = []
    best_params, best_val = None, float("inf")
    best_epoch = -1
    no_improve = 0
    t0 = time.time()
    rng = np.random.default_rng(seed)
    low, high = info["scale_low"], info["scale_high"]
    sigma = noise_level * (high - low)
    for ep in range(n_epochs):
        idx = rng.permutation(len(X_clean))
        Xc = X_clean[idx]
        s = 0.0; nb = 0
        for i in range(0, len(Xc), batch_size):
            Xcb = Xc[i:i+batch_size]
            Xnb = ts_add_noise(Xcb, noise=sigma, low=low, high=high)
            params, c = opt.step_and_cost(
                lambda p: s2_cost(p, Xnb, Xcb, dec_dagger_params), params
            )
            s += float(c); nb += 1
        train_cost = s/nb
        hist.append(train_cost)
        lr_hist.append(opt.stepsize)
        # --- walidacja ---
        if X_val_clean is not None:
            val_cost = float(s2_cost_dataset(params, X_val_clean, dec_dagger_params, noise_level))
            # --- baseline/noise-only na walidacji ---
            X_noisy_val = ts_add_noise(X_val_clean, noise=sigma, low=low, high=high)
            mse_noisy = float(s2_cost(params*0, X_noisy_val, X_val_clean, dec_dagger_params))  # params*0 = brak uczenia
            hist_noisy.append(mse_noisy)
            if mse_noisy > 1e-12:
                delta = 100.0 * (1.0 - val_cost / mse_noisy)
            else:
                delta = 0.0
            hist_delta.append(delta)
            hist_val.append(val_cost)
        else:
            val_cost = train_cost
            hist_val.append(train_cost)
            hist_noisy.append(np.nan)
            hist_delta.append(np.nan)
        print(f"[Stage3] L={len(enc_params_init)//(n_qubits*3)} ep {ep:03d} | train {train_cost:.5f} | "
              f"val {val_cost:.5f} | noisy {mse_noisy:.5f} | Δ {delta:+.1f}% | LR {opt.stepsize:.5f}")
        if train_cost < best_val - 1e-12:
            best_val, best_params, best_epoch, no_improve = train_cost, pnp.array(params, requires_grad=False), ep, 0
        else:
            no_improve += 1
        if no_improve >= 10:
            break
    
    train_seconds = float(time.time() - t0)
    return dict(
        psi=best_params if best_params is not None else params,
        best_val=float(best_val),
        best_epoch=int(best_epoch),
        epochs=len(hist),
        hist_train=list(map(float, hist)),
        hist_val=list(map(float, hist_val)),
        hist_lr=list(map(float, lr_hist)),
        hist_noisy=list(map(float, hist_noisy)),
        hist_delta=list(map(float, hist_delta)),
        train_seconds=train_seconds
    )

In [9]:
def train_stage1(X_train, X_val, n_layers=2, instance_id=1, n_epochs=120, batch_size=32, lr_init=0.01, patience=10, lr_patience=8, min_delta=1e-6, seed=0):  # <-- ZMIANA
    param_shape = n_layers * n_qubits * 3
    rng = np.random.default_rng(seed)  # <-- ZMIANA
    params_init = rng.uniform(-0.01, 0.01, param_shape)  # <-- ZMIANA
    result = train_adjoint_decoder(
        params_init, X_train,
        n_epochs=n_epochs, batch_size=batch_size, lr=lr_init,
        X_val_clean=X_val,
        seed=seed  # <-- ZMIANA
    )
    return result

def train_stage3(X_train, X_val, dec_dagger_params, n_layers=2, instance_id=1, n_epochs=120, batch_size=32, lr_init=0.01, noise_level=0.10, seed=0):
    param_shape = n_layers * n_qubits * 3
    rng = np.random.default_rng(seed)  # <-- ZMIANA
    enc_params_init = rng.uniform(-0.01, 0.01, param_shape)  # <-- ZMIANA
    result = train_encoder_with_sidekick_dyn_noise(
        enc_params_init, X_train,
        dec_dagger_params, noise_level,
        n_epochs=n_epochs, batch_size=batch_size, lr=lr_init, seed=seed,
        X_val_clean=X_val
    )
    return result

In [10]:
RUNS = []

for L in LAYER_OPTIONS:
    for inst in INSTANCE_IDS:
        print(f"\n==============================")
        print(f"Instance {inst} | Layers {L}")
        print(f"==============================")

        t0 = time.time()
        s1 = train_stage1(
            X_train, X_val,
            n_layers=L,
            instance_id=inst,
            n_epochs=120, batch_size=32,
            lr_init=0.010, patience=10, lr_patience=8, min_delta=1e-6,
            seed=inst  # <-- ZMIANA
        )
        t1 = time.time()

        s3 = train_stage3(
            X_train, X_val,
            dec_dagger_params=s1["phi"],
            n_layers=L,
            instance_id=inst,
            n_epochs=60, batch_size=16,
            lr_init=0.010, noise_level=EVAL_SIGMA, seed=inst  # <-- już było
        )
        t2 = time.time()

        RUNS.append({
            "instance_id": inst,
            "n_layers": L,
            "stage1": {
                "phi": s1["phi"],
                "best_val": s1["best_val"],
                "hist_train": s1["hist_train"],
                "hist_val": s1["hist_val"],
                "hist_lr": s1["hist_lr"],
                "best_epoch": s1.get("best_epoch"),
                "epochs": s1.get("epochs"),
                "train_seconds": float(t1 - t0),
            },
            "stage3": {
                "psi": s3["psi"],
                "best_val": s3["best_val"],
                "hist_train": s3["hist_train"],
                "hist_val": s3["hist_val"],
                "hist_lr": s3["hist_lr"],
                "best_epoch": s3.get("best_epoch"),
                "epochs": s3.get("epochs"),
                "hist_noisy": s3.get("hist_noisy", []),
                "hist_delta": s3.get("hist_delta", []),
                "train_seconds": float(t2 - t1),
            }
        })

print(f"\nCompleted {len(RUNS)} runs.")


Instance 1 | Layers 1
[Stage1] L=1 ep 000 | train 0.063804 | val 0.056745 | LR 0.01000
[Stage1] L=1 ep 001 | train 0.063256 | val 0.055486 | LR 0.01000
[Stage1] L=1 ep 002 | train 0.062227 | val 0.054336 | LR 0.01000
[Stage1] L=1 ep 003 | train 0.060651 | val 0.053375 | LR 0.01000
[Stage1] L=1 ep 004 | train 0.060709 | val 0.052591 | LR 0.01000
[Stage1] L=1 ep 005 | train 0.060193 | val 0.051948 | LR 0.01000
[Stage1] L=1 ep 006 | train 0.059144 | val 0.051482 | LR 0.01000
[Stage1] L=1 ep 007 | train 0.059238 | val 0.051172 | LR 0.01000
[Stage1] L=1 ep 008 | train 0.058978 | val 0.050959 | LR 0.01000
[Stage1] L=1 ep 009 | train 0.058752 | val 0.050866 | LR 0.01000
[Stage1] L=1 ep 010 | train 0.057992 | val 0.050800 | LR 0.01000
[Stage1] L=1 ep 011 | train 0.059229 | val 0.050820 | LR 0.01000
[Stage1] L=1 ep 012 | train 0.059281 | val 0.050848 | LR 0.01000
[Stage1] L=1 ep 013 | train 0.058646 | val 0.050913 | LR 0.01000
[Stage1] L=1 ep 014 | train 0.060230 | val 0.050952 | LR 0.01000
[S

In [12]:
# ======================================================================
# Cell 7 — Save artifacts (JSON) and append a paper-ready CSV per run
# ======================================================================
from pathlib import Path
import json, time, os, csv
import numpy as np
import pandas as pd

# --- hyperparams logged (keep in sync with training cells) ---
S1_LR_INIT       = 0.010
S1_MAX_EPOCHS    = 120
S1_PATIENCE      = 10
S1_LR_PATIENCE   = 8

S3_LR_INIT       = 0.010
S3_MAX_EPOCHS    = 60

CSV_SCHEMA_VERSION = "v3"  # bump if you change columns

# --- ensure dirs ---
def ensure_dir(path):
    Path(path).mkdir(parents=True, exist_ok=True)
    return path

ensure_dir(OUT_BASE)
subroot = ensure_dir(f"{OUT_BASE}/q{n_qubits}_l{n_latent}t{n_trash}")

# --- CSV path (versioned) ---
CSV_PATH = f"{OUT_BASE}/all_training_instances_{CSV_SCHEMA_VERSION}.csv"

# --- header for the full, paper-friendly table ---
CSV_HEADER = [
    # id / naming
    "filename","run_tag","dataset_folder","instance_id","rng_seed",
    # architecture
    "n_qubits","n_latent","n_trash","n_layers",
    # noise & window
    "sigma_train","sigma_eval","window_stride",
    # stage-1 hyperparams + outcomes
    "s1_lr_init","s1_max_epochs","s1_patience","s1_lr_patience",
    "s1_best_val","s1_final_val","s1_best_epoch","s1_epochs","s1_train_seconds",
    # stage-3 hyperparams + outcomes
    "s3_lr_init","s3_max_epochs",
    "s3_best_val_mse","s3_final_val_mse","s3_best_epoch","s3_epochs","s3_train_seconds",
    "s3_noisy_baseline_mse","s3_best_delta_pct","s3_final_delta_pct",
    # params (JSON)
    "phi_params","psi_params",
    # totals
    "total_train_seconds",
]

def ensure_csv(path, header):
    needs_header = True
    if os.path.exists(path):
        try:
            with open(path, "r", encoding="utf-8") as f:
                first_line = f.readline().rstrip("\n")
            needs_header = (first_line != ",".join(header))
        except Exception:
            needs_header = True
    if needs_header:
        with open(path, "w", newline="", encoding="utf-8") as f:
            w = csv.writer(f)
            w.writerow(header)

ensure_csv(CSV_PATH, CSV_HEADER)

def _safe_argmin(seq):
    try:
        return int(np.nanargmin(seq)) if len(seq) else -1
    except Exception:
        return -1

def _safe_last(seq):
    return float(seq[-1]) if (isinstance(seq, (list, tuple)) and len(seq)) else np.nan

def save_one_run(run):
    inst = int(run["instance_id"])
    L    = int(run["n_layers"])
    seed = int(run.get("seed", inst))

    # standardized filename: 4q_3l_1t_{L}ls_{inst:02d}.json
    fname = f"4q_3l_1t_{L}ls_{inst:02d}.json"
    out_dir = ensure_dir(f"{subroot}/L{L}")
    bundle_path = os.path.join(out_dir, fname)

    # pull stage results (robust to missing keys)
    s1 = run["stage1"]
    s3 = run["stage3"]

    # Stage-1 metrics
    s1_hist_val = list(map(float, s1.get("hist_val", [])))
    s1_best_val = float(s1.get("best_val", np.nan))
    s1_final_val = _safe_last(s1_hist_val)
    s1_best_epoch = int(s1.get("best_epoch", _safe_argmin(s1_hist_val)))
    s1_epochs = int(s1.get("epochs", len(s1_hist_val)))
    s1_seconds = float(s1.get("train_seconds", np.nan))

    # Stage-3 metrics
    s3_hist_val = list(map(float, s3.get("hist_val", [])))
    s3_hist_noisy = list(map(float, s3.get("hist_noisy", [])))
    s3_hist_delta = list(map(float, s3.get("hist_delta", [])))
    s3_best_val = float(s3.get("best_val", np.nan))
    s3_final_val = _safe_last(s3_hist_val)
    s3_best_epoch = int(s3.get("best_epoch", _safe_argmin(s3_hist_val)))
    s3_epochs = int(s3.get("epochs", len(s3_hist_val)))
    s3_seconds = float(s3.get("train_seconds", np.nan))

    # --- compute metrics with FALLBACKS if curves are missing ---
    noisy_baseline = float(np.nanmean(s3_hist_noisy)) if len(s3_hist_noisy) else np.nan
    best_delta     = (float(np.nanmax(s3_hist_delta)) if (len(s3_hist_delta) and np.isfinite(np.nanmax(s3_hist_delta)))
                      else np.nan)
    final_delta    = _safe_last(s3_hist_delta)

    # bundle JSON (parameters + training curves)
    bundle = {
        "schema": {"name": "half_qae_bundle", "version": "1.0"},
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
        "dataset": {
            "scale_low":  float(scale_low),
            "scale_high": float(scale_high),
            "window_size": int(n_qubits),
            "window_stride": 1,
        },
        "run": {
            "tag": f"inst{inst}_L{L}",
            "instance_id": inst,
            "seed": seed,
            "sigma_train": float(EVAL_SIGMA),
            "sigma_eval":  float(EVAL_SIGMA),
        },
        "architecture": {
            "n_qubits": int(n_qubits),
            "n_layers": int(L),
            "n_latent": int(n_latent),
            "n_trash":  int(n_trash),
            "latent_wires": list(range(n_latent)),
            "trash_wires":  list(range(n_latent, n_qubits)),
        },
        "training": {
            "stage1": {
                "lr_init": S1_LR_INIT, "max_epochs": S1_MAX_EPOCHS,
                "patience": S1_PATIENCE, "lr_patience": S1_LR_PATIENCE,
                "best_val": s1_best_val, "final_val": s1_final_val,
                "best_epoch": s1_best_epoch, "epochs": s1_epochs,
                "train_curve": s1.get("hist_train", []), "val_curve": s1_hist_val, "lr_curve": s1.get("hist_lr", []),
                "train_seconds": s1_seconds,
            },
            "stage3": {
                "lr_init": S3_LR_INIT, "max_epochs": S3_MAX_EPOCHS,
                "best_val_mse": s3_best_val, "final_val_mse": s3_final_val,
                "best_epoch": s3_best_epoch, "epochs": s3_epochs,
                "train_curve": s3.get("hist_train", []), "val_curve": s3_hist_val, "lr_curve": s3.get("hist_lr", []),
                "noisy_curve": s3_hist_noisy, "delta_curve": s3_hist_delta,
                "train_seconds": s3_seconds,
            }
        },
        "parameters": {
            "phi_stage1": np.array(s1.get("phi", [])).tolist(),
            "psi_stage3": np.array(s3.get("psi", [])).tolist(),
        },
    }
    with open(bundle_path, "w", encoding="utf-8") as f:
        json.dump(bundle, f, indent=2)
    print(f"Saved bundle → {bundle_path}")

    # assemble CSV row
    phi_params = json.dumps(bundle["parameters"]["phi_stage1"])
    psi_params = json.dumps(bundle["parameters"]["psi_stage3"])
    total_seconds = float((0 if np.isnan(s1_seconds) else s1_seconds) + (0 if np.isnan(s3_seconds) else s3_seconds))

    row = [
        os.path.basename(bundle_path),
        f"inst{inst}_L{L}",
        OUT_BASE,
        inst, seed,
        int(n_qubits), int(n_latent), int(n_trash), int(L),
        f"{EVAL_SIGMA:.3f}", f"{EVAL_SIGMA:.3f}", 1,
        f"{S1_LR_INIT:.6f}", int(S1_MAX_EPOCHS), int(S1_PATIENCE), int(S1_LR_PATIENCE),
        f"{s1_best_val:.8f}", f"{s1_final_val:.8f}", s1_best_epoch, s1_epochs, s1_seconds,
        f"{S3_LR_INIT:.6f}", int(S3_MAX_EPOCHS),
        f"{s3_best_val:.8f}", f"{s3_final_val:.8f}", s3_best_epoch, s3_epochs, s3_seconds,
        noisy_baseline, best_delta, final_delta,
        phi_params, psi_params,
        total_seconds,
    ]

    # upsert row into CSV
    row_df = pd.DataFrame([row], columns=CSV_HEADER)
    if Path(CSV_PATH).exists():
        df_old = pd.read_csv(CSV_PATH)
        key = os.path.basename(bundle_path)
        if "filename" in df_old.columns:
            df_old = df_old[df_old["filename"] != key]
        df_new = pd.concat([df_old, row_df], ignore_index=True)
        df_new.to_csv(CSV_PATH, index=False)
    else:
        row_df.to_csv(CSV_PATH, index=False)
    print(f"Upserted CSV row  → {CSV_PATH}")

# ---- save all runs from Cell 7 ----
for run in RUNS:
    save_one_run(run)

print("\nAll runs saved and recorded in CSV.")

Saved bundle → ./runs_halfqae/q4_l3t1/L1\4q_3l_1t_1ls_01.json
Upserted CSV row  → ./runs_halfqae/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae/q4_l3t1/L1\4q_3l_1t_1ls_02.json
Upserted CSV row  → ./runs_halfqae/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae/q4_l3t1/L1\4q_3l_1t_1ls_03.json
Upserted CSV row  → ./runs_halfqae/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae/q4_l3t1/L1\4q_3l_1t_1ls_04.json
Upserted CSV row  → ./runs_halfqae/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae/q4_l3t1/L1\4q_3l_1t_1ls_05.json
Upserted CSV row  → ./runs_halfqae/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae/q4_l3t1/L3\4q_3l_1t_3ls_01.json
Upserted CSV row  → ./runs_halfqae/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae/q4_l3t1/L3\4q_3l_1t_3ls_02.json
Upserted CSV row  → ./runs_halfqae/all_training_instances_v3.csv
Saved bundle → ./runs_halfqae/q4_l3t1/L3\4q_3l_1t_3ls_03.json
Upserted CSV row  → ./runs_halfqae/all_training_i

In [13]:
# =========================================================
# Cell 9 — Build & preview the training-only results table
# =========================================================
import pandas as pd
import numpy as np
from pathlib import Path

if not Path(CSV_PATH).exists():
    raise FileNotFoundError(f"CSV not found: {CSV_PATH}. Run Cell 8 first.")

df = pd.read_csv(CSV_PATH)

# --- NEW: drop duplicate runs; keep the newest copy (with baseline/delta)
if "filename" in df.columns:
    df = df.drop_duplicates(subset=["filename"], keep="last")
else:
    df = df.drop_duplicates(subset=["run_tag","instance_id","n_layers"], keep="last")

# Typical numeric casts (safe)
for col in [
    "s3_noisy_baseline_mse","s3_best_delta_pct","s3_final_delta_pct",
    "s3_best_val_mse","s3_final_val_mse",
    "s1_best_val","s1_final_val",
    "s1_train_seconds","s3_train_seconds","total_train_seconds",
    "s1_best_epoch","s1_epochs","s3_best_epoch","s3_epochs"
]:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

df = df.sort_values(["n_layers","instance_id"]).reset_index(drop=True)

clean_path = f"{OUT_BASE}/all_training_instances_{CSV_SCHEMA_VERSION}.csv"
Path(OUT_BASE).mkdir(parents=True, exist_ok=True)
df.to_csv(clean_path, index=False)
print(f"Saved training-only table → {clean_path}")

# A compact per-layer summary (mean±std); guards against all-NaN
def mean_std_safe(s: pd.Series) -> str:
    v = pd.to_numeric(s, errors="coerce").astype(float)
    v = v[np.isfinite(v)]
    if v.size == 0: return "n/a"
    return f"{v.mean():.6f} ± {v.std(ddof=0):.6f}"

metrics = [
    ("s3_noisy_baseline_mse", "noisy_baseline_mse (mean±std)"),
    ("s3_best_val_mse",       "best_val_mse (mean±std)"),
    ("s3_final_val_mse",      "final_val_mse (mean±std)"),
    ("s3_best_delta_pct",     "best_delta_pct (mean±std)"),
    ("s3_final_delta_pct",    "final_delta_pct (mean±std)"),
    ("s1_best_val",           "s1_best_val (mean±std)"),
]

grp = df.groupby("n_layers", dropna=False)
summary = pd.DataFrame({"runs": grp.size()})
for col, label in metrics:
    if col in df.columns and np.isfinite(df[col]).any():
        summary[label] = grp[col].apply(mean_std_safe)

summary_path = f"{OUT_BASE}/summary_by_layers_{CSV_SCHEMA_VERSION}.csv"
summary.to_csv(summary_path, index=True)
print(f"Saved per-layer summary → {summary_path}")

display(df.head(10))
display(summary)

Saved training-only table → ./runs_halfqae/all_training_instances_v3.csv
Saved per-layer summary → ./runs_halfqae/summary_by_layers_v3.csv


Unnamed: 0,filename,run_tag,dataset_folder,instance_id,rng_seed,n_qubits,n_latent,n_trash,n_layers,sigma_train,...,s3_final_val_mse,s3_best_epoch,s3_epochs,s3_train_seconds,s3_noisy_baseline_mse,s3_best_delta_pct,s3_final_delta_pct,phi_params,psi_params,total_train_seconds
0,4q_3l_1t_1ls_01.json,inst1_L1,./runs_halfqae,1,1,4,3,1,1,0.1,...,0.093889,4,15,122.600948,0.098557,16.734282,1.314132,"[0.07920300593268884, 0.1741352513847527, -0.0...","[0.0366161477584314, -0.13505197325611903, -0....",223.291317
1,4q_3l_1t_1ls_02.json,inst2_L1,./runs_halfqae,2,2,4,3,1,1,0.1,...,0.091986,5,16,148.427447,0.095022,16.231979,-2.75394,"[-0.1419920046602157, 0.17041320656823505, 0.0...","[-0.04059580305207794, -0.1550606613555467, 0....",314.273312
2,4q_3l_1t_1ls_03.json,inst3_L1,./runs_halfqae,3,3,4,3,1,1,0.1,...,0.082347,4,15,87.759136,0.098217,17.082315,16.247384,"[-0.09213537167300323, 0.17792055054259284, 0....","[0.16011008854084957, -0.12812737228335272, -0...",200.461899
3,4q_3l_1t_1ls_04.json,inst4_L1,./runs_halfqae,4,4,4,3,1,1,0.1,...,0.094989,3,14,107.617503,0.101407,17.429752,13.082997,"[0.01920366015605288, 0.19949546744272428, -0....","[-0.00714619063878673, -0.132966307835624, 0.0...",234.036083
4,4q_3l_1t_1ls_05.json,inst5_L1,./runs_halfqae,5,5,4,3,1,1,0.1,...,0.089805,13,24,156.167812,0.098323,22.750509,5.944734,"[-0.006951072014653309, 0.19005931260571107, 0...","[0.05768951136773057, -0.23974031577343044, -0...",309.764705
5,4q_3l_1t_3ls_01.json,inst1_L3,./runs_halfqae,1,1,4,3,1,3,0.1,...,0.094036,14,25,907.6626,0.110019,24.199576,19.290737,"[0.004985296223276446, 0.00476188496708803, -0...","[-0.053674728899125806, -0.23733318852153798, ...",1360.208073
6,4q_3l_1t_3ls_02.json,inst2_L3,./runs_halfqae,2,2,4,3,1,3,0.1,...,0.088488,39,50,1242.876786,0.106812,30.178496,22.973731,"[0.005943542272704687, -0.005901366808950797, ...","[-0.06415235865224381, -0.19645577673833928, 0...",1626.11145
7,4q_3l_1t_3ls_03.json,inst3_L3,./runs_halfqae,3,3,4,3,1,3,0.1,...,0.044067,57,60,2711.642654,0.289907,85.154858,85.007987,"[-0.16021938627037816, 0.16815956481981176, 0....","[0.06863906304281042, -0.18745406220578748, -0...",4682.675478
8,4q_3l_1t_3ls_04.json,inst4_L3,./runs_halfqae,4,4,4,3,1,3,0.1,...,0.07096,56,60,2035.343617,0.356201,83.150482,79.737196,"[0.26651923602831495, 0.26923452972840145, -0....","[-0.14633881274005284, -0.020503136254279193, ...",4803.325313
9,4q_3l_1t_3ls_05.json,inst5_L3,./runs_halfqae,5,5,4,3,1,3,0.1,...,0.049984,58,60,1068.343299,0.327131,86.655977,83.909849,"[0.010133178674081058, 0.16217964084610895, 0....","[0.2433202582659158, -0.4172195315114159, -0.2...",2664.366669


Unnamed: 0_level_0,runs,noisy_baseline_mse (mean±std),best_val_mse (mean±std),final_val_mse (mean±std),best_delta_pct (mean±std),final_delta_pct (mean±std),s1_best_val (mean±std)
n_layers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,5,0.098305 ± 0.002023,0.101456 ± 0.001502,0.090603 ± 0.004489,18.045767 ± 2.385462,6.767062 ± 7.082763,0.057588 ± 0.000314
3,5,0.238014 ± 0.107888,0.071661 ± 0.023251,0.069507 ± 0.019962,61.867878 ± 28.400002,58.183900 ± 30.326011,0.030579 ± 0.020030
