# QuartumSE Research Benchmark Suite

This notebook benchmarks classical shadows against direct measurement baselines across **all circuits from the research plan**.

## Research Workstreams

| Workstream | Focus | Circuits |
|------------|-------|----------|
| **S** | Shadows Core | GHZ, Bell pairs, Clifford, Ising |
| **C** | Chemistry | H₂, LiH, BeH₂ molecular ansätze |
| **O** | Optimization | QAOA MAX-CUT |
| **B** | Benchmarking | RB/XEB random circuits |
| **M** | Metrology | GHZ phase sensing |

## Usage

1. Configure which circuits to run in Section 2
2. Run all cells
3. View the Task Summary Report at the end

In [1]:
# --- Setup ---
import sys
sys.path.insert(0, '../src')

import numpy as np
from collections import Counter
from qiskit import QuantumCircuit

from quartumse import (
    run_benchmark_suite,
    BenchmarkMode,
    BenchmarkSuiteConfig,
    generate_observable_set,
    Observable,
    ObservableSet,
)

print("Setup complete!")

Setup complete!


---

## 1. Circuit Definitions

All circuits from the research plan, organized by workstream.

In [2]:
# =============================================================================
# CIRCUIT BUILDERS
# =============================================================================

# -----------------------------------------------------------------------------
# WORKSTREAM S: SHADOWS CORE
# -----------------------------------------------------------------------------

def build_ghz(n_qubits: int) -> QuantumCircuit:
    """GHZ state: |00...0⟩ + |11...1⟩ / sqrt(2)
    
    Used in: SMOKE-SIM, SMOKE-HW, S-T01, S-T02
    """
    qc = QuantumCircuit(n_qubits, name=f'GHZ_{n_qubits}q')
    qc.h(0)
    for i in range(1, n_qubits):
        qc.cx(i - 1, i)
    return qc


def build_bell_pairs(n_pairs: int) -> QuantumCircuit:
    """Parallel Bell pairs: ⊗ (|00⟩ + |11⟩) / sqrt(2)
    
    Used in: S-BELL
    """
    n_qubits = 2 * n_pairs
    qc = QuantumCircuit(n_qubits, name=f'Bell_{n_pairs}pairs')
    for i in range(n_pairs):
        qc.h(2 * i)
        qc.cx(2 * i, 2 * i + 1)
    return qc


def build_random_clifford(n_qubits: int, depth: int, seed: int = 42) -> QuantumCircuit:
    """Random Clifford circuit.
    
    Used in: S-CLIFF, B-T01
    """
    rng = np.random.default_rng(seed)
    qc = QuantumCircuit(n_qubits, name=f'Clifford_{n_qubits}q_d{depth}')
    
    clifford_gates = ['h', 's', 'sdg', 'x', 'y', 'z']
    
    for _ in range(depth):
        # Random single-qubit Clifford gates
        for q in range(n_qubits):
            gate = rng.choice(clifford_gates)
            getattr(qc, gate)(q)
        
        # Random CNOTs between adjacent qubits
        for q in range(0, n_qubits - 1, 2):
            if rng.random() > 0.3:
                qc.cx(q, q + 1)
    
    return qc


def build_ising_trotter(n_qubits: int, steps: int = 3, dt: float = 0.5) -> QuantumCircuit:
    """Trotterized transverse-field Ising model evolution.
    
    H = -J Σ Z_i Z_{i+1} - h Σ X_i
    
    Used in: S-ISING
    """
    qc = QuantumCircuit(n_qubits, name=f'Ising_{n_qubits}q_t{steps}')
    
    J = 1.0  # Coupling strength
    h = 0.5  # Transverse field
    
    # Initial state: all |+⟩
    for q in range(n_qubits):
        qc.h(q)
    
    # Trotter steps
    for _ in range(steps):
        # ZZ interactions (1D chain)
        for q in range(n_qubits - 1):
            qc.cx(q, q + 1)
            qc.rz(2 * J * dt, q + 1)
            qc.cx(q, q + 1)
        
        # Transverse field (X rotations)
        for q in range(n_qubits):
            qc.rx(2 * h * dt, q)
    
    return qc


# -----------------------------------------------------------------------------
# WORKSTREAM C: CHEMISTRY
# -----------------------------------------------------------------------------

def build_h2_ansatz(theta: float = 0.0) -> QuantumCircuit:
    """H₂ molecule ansatz (4 qubits, STO-3G basis).
    
    Simplified UCCSD-like ansatz for H₂.
    
    Used in: C-T01
    """
    qc = QuantumCircuit(4, name='H2_ansatz')
    
    # Hartree-Fock reference: |0011⟩ (2 electrons in lowest orbitals)
    qc.x(0)
    qc.x(1)
    
    # Single excitation: |01⟩ ↔ |10⟩ on qubits 1,2
    qc.cx(1, 2)
    qc.ry(theta, 2)
    qc.cx(1, 2)
    
    # Double excitation approximation
    qc.cx(0, 3)
    qc.ry(theta / 2, 3)
    qc.cx(0, 3)
    
    return qc


def build_lih_ansatz(theta: float = 0.0) -> QuantumCircuit:
    """LiH molecule ansatz (6 qubits, minimal basis).
    
    Simplified ansatz for LiH.
    
    Used in: C-T02
    """
    qc = QuantumCircuit(6, name='LiH_ansatz')
    
    # Hartree-Fock reference: |000111⟩ (4 electrons)
    qc.x(0)
    qc.x(1)
    qc.x(2)
    qc.x(3)
    
    # Excitations
    qc.cx(3, 4)
    qc.ry(theta, 4)
    qc.cx(3, 4)
    
    qc.cx(2, 5)
    qc.ry(theta / 2, 5)
    qc.cx(2, 5)
    
    return qc


def build_beh2_ansatz(theta: float = 0.0) -> QuantumCircuit:
    """BeH₂ molecule ansatz (8 qubits, minimal basis).
    
    Simplified ansatz for BeH₂.
    
    Used in: C-T03
    """
    qc = QuantumCircuit(8, name='BeH2_ansatz')
    
    # Hartree-Fock reference: |00001111⟩ (6 electrons)
    for i in range(6):
        qc.x(i)
    
    # Excitations
    qc.cx(5, 6)
    qc.ry(theta, 6)
    qc.cx(5, 6)
    
    qc.cx(4, 7)
    qc.ry(theta / 2, 7)
    qc.cx(4, 7)
    
    return qc


# -----------------------------------------------------------------------------
# WORKSTREAM O: OPTIMIZATION
# -----------------------------------------------------------------------------

def build_qaoa_maxcut_ring(n_qubits: int, p: int = 1, gamma: float = 0.5, beta: float = 0.5) -> QuantumCircuit:
    """QAOA for MAX-CUT on ring graph.
    
    Ring topology: 0-1-2-3-...-n-1-0
    
    Used in: O-T01, O-T02
    """
    qc = QuantumCircuit(n_qubits, name=f'QAOA_ring_{n_qubits}q_p{p}')
    
    # Initial superposition
    for q in range(n_qubits):
        qc.h(q)
    
    # QAOA layers
    for layer in range(p):
        # Cost layer: ZZ on edges (ring topology)
        for q in range(n_qubits):
            q_next = (q + 1) % n_qubits
            qc.cx(q, q_next)
            qc.rz(2 * gamma, q_next)
            qc.cx(q, q_next)
        
        # Mixer layer: RX on all qubits
        for q in range(n_qubits):
            qc.rx(2 * beta, q)
    
    return qc


# -----------------------------------------------------------------------------
# WORKSTREAM M: METROLOGY
# -----------------------------------------------------------------------------

def build_ghz_phase_sensing(n_qubits: int, phi: float = 0.1) -> QuantumCircuit:
    """GHZ state with phase encoding for quantum sensing.
    
    Used in: M-T01
    """
    qc = QuantumCircuit(n_qubits, name=f'GHZ_phase_{n_qubits}q')
    
    # Create GHZ
    qc.h(0)
    for i in range(1, n_qubits):
        qc.cx(i - 1, i)
    
    # Apply phase (sensing)
    for q in range(n_qubits):
        qc.rz(phi, q)
    
    return qc


# -----------------------------------------------------------------------------
# WORKSTREAM B: BENCHMARKING
# -----------------------------------------------------------------------------

def build_xeb_circuit(n_qubits: int, depth: int, seed: int = 42) -> QuantumCircuit:
    """Cross-Entropy Benchmarking random circuit.
    
    Used in: B-T01
    """
    rng = np.random.default_rng(seed)
    qc = QuantumCircuit(n_qubits, name=f'XEB_{n_qubits}q_d{depth}')
    
    gates_1q = ['h', 'x', 'y', 'z', 's', 't', 'sdg', 'tdg']
    
    for d in range(depth):
        # Random single-qubit gates
        for q in range(n_qubits):
            gate = rng.choice(gates_1q)
            getattr(qc, gate)(q)
        
        # Alternating CNOT pattern
        start = d % 2
        for q in range(start, n_qubits - 1, 2):
            qc.cx(q, q + 1)
    
    return qc


print("Circuit builders defined!")

Circuit builders defined!


In [None]:
# =============================================================================
# OBSERVABLE BUILDERS - COMPREHENSIVE COVERAGE
# =============================================================================
# Each circuit needs 25-30+ observables with mixed localities (k=1,2,3,...,n)
# to properly benchmark classical shadows vs direct measurement.

def build_comprehensive_observables(n_qubits: int, 
                                     include_stabilizers: list[str] = None,
                                     min_observables: int = 25) -> list[Observable]:
    """Build a comprehensive observable set with mixed localities.
    
    Ensures coverage across all Pauli weight levels for proper benchmarking.
    """
    obs = set()  # Use set to avoid duplicates
    
    # 1-local: All single-qubit X, Y, Z
    for i in range(n_qubits):
        for pauli in ['X', 'Y', 'Z']:
            s = 'I' * i + pauli + 'I' * (n_qubits - i - 1)
            obs.add(s)
    
    # 2-local: Nearest-neighbor XX, YY, ZZ, XY, XZ, YZ
    for i in range(n_qubits - 1):
        for p1, p2 in [('Z','Z'), ('X','X'), ('Y','Y'), ('X','Y'), ('X','Z'), ('Y','Z')]:
            s = 'I' * i + p1 + p2 + 'I' * (n_qubits - i - 2)
            obs.add(s)
    
    # 3-local (if n >= 3): Some ZZZ, XXX combinations
    if n_qubits >= 3:
        for i in range(n_qubits - 2):
            obs.add('I' * i + 'ZZZ' + 'I' * (n_qubits - i - 3))
            obs.add('I' * i + 'XXX' + 'I' * (n_qubits - i - 3))
    
    # n-local: Full weight stabilizers
    if n_qubits <= 8:
        obs.add('Z' * n_qubits)
        obs.add('X' * n_qubits)
        if n_qubits >= 4:
            obs.add('Y' * n_qubits)
    
    # Add custom stabilizers if provided
    if include_stabilizers:
        for s in include_stabilizers:
            if len(s) == n_qubits:
                obs.add(s)
    
    # Add random observables if we need more to reach minimum
    obs_list = [Observable(s) for s in obs]
    if len(obs_list) < min_observables:
        extra = generate_observable_set(
            generator_id='random_pauli',
            n_qubits=n_qubits,
            n_observables=min_observables - len(obs_list) + 5,
            seed=42,
        )
        for o in extra.observables:
            if o.pauli_string not in obs:
                obs_list.append(o)
                if len(obs_list) >= min_observables:
                    break
    
    return obs_list


def build_ghz_observables(n_qubits: int) -> list[Observable]:
    """GHZ state observables - comprehensive set."""
    # GHZ stabilizers: XXXX... and ZIZI..., IZIZ..., etc.
    stabilizers = ['X' * n_qubits, 'Z' * n_qubits]
    
    # Parity checks
    for i in range(n_qubits - 1):
        s = ['I'] * n_qubits
        s[i] = 'Z'
        s[i+1] = 'Z'
        stabilizers.append(''.join(s))
    
    return build_comprehensive_observables(n_qubits, stabilizers, min_observables=25)


def build_bell_observables(n_pairs: int) -> list[Observable]:
    """Bell pair observables - comprehensive set with cross-pair correlations."""
    n_qubits = 2 * n_pairs
    obs = set()
    
    # Within-pair correlators (Bell stabilizers)
    for p in range(n_pairs):
        i = 2 * p
        for p1, p2 in [('Z','Z'), ('X','X'), ('Y','Y'), ('X','Y'), ('Y','X')]:
            s = ['I'] * n_qubits
            s[i] = p1
            s[i+1] = p2
            obs.add(''.join(s))
    
    # Single-qubit observables (detect decoherence)
    for i in range(n_qubits):
        for pauli in ['X', 'Y', 'Z']:
            s = ['I'] * n_qubits
            s[i] = pauli
            obs.add(''.join(s))
    
    # Cross-pair correlators (detect crosstalk)
    if n_pairs >= 2:
        for p1 in range(n_pairs):
            for p2 in range(p1 + 1, n_pairs):
                for pauli in ['Z', 'X']:
                    s = ['I'] * n_qubits
                    s[2*p1] = pauli
                    s[2*p2] = pauli
                    obs.add(''.join(s))
    
    # 3-body and 4-body terms
    if n_qubits >= 4:
        obs.add('Z' * n_qubits)
        obs.add('X' * n_qubits)
        # Mixed 3-body
        for i in range(n_qubits - 2):
            obs.add('I' * i + 'ZZZ' + 'I' * (n_qubits - i - 3))
    
    obs_list = [Observable(s) for s in obs]
    
    # Pad with random if needed
    if len(obs_list) < 25:
        extra = generate_observable_set(
            generator_id='random_pauli',
            n_qubits=n_qubits,
            n_observables=30 - len(obs_list),
            seed=42,
        )
        for o in extra.observables:
            if o.pauli_string not in obs:
                obs_list.append(o)
    
    return obs_list


def build_ising_observables(n_qubits: int) -> list[Observable]:
    """Ising model observables - comprehensive energy and correlation terms."""
    obs = set()
    
    # Hamiltonian terms: ZZ interactions (1D chain)
    for i in range(n_qubits - 1):
        obs.add('I' * i + 'ZZ' + 'I' * (n_qubits - i - 2))
    
    # Transverse field: X terms
    for i in range(n_qubits):
        obs.add('I' * i + 'X' + 'I' * (n_qubits - i - 1))
    
    # Magnetization: Z terms
    for i in range(n_qubits):
        obs.add('I' * i + 'Z' + 'I' * (n_qubits - i - 1))
    
    # Y terms for complete 1-local coverage
    for i in range(n_qubits):
        obs.add('I' * i + 'Y' + 'I' * (n_qubits - i - 1))
    
    # Additional 2-body: XX, YY correlators
    for i in range(n_qubits - 1):
        obs.add('I' * i + 'XX' + 'I' * (n_qubits - i - 2))
        obs.add('I' * i + 'YY' + 'I' * (n_qubits - i - 2))
    
    # 3-body: ZZZ for energy fluctuations
    if n_qubits >= 3:
        for i in range(n_qubits - 2):
            obs.add('I' * i + 'ZZZ' + 'I' * (n_qubits - i - 3))
    
    # Full weight
    obs.add('Z' * n_qubits)
    obs.add('X' * n_qubits)
    
    obs_list = [Observable(s) for s in obs]
    
    # Pad if needed
    if len(obs_list) < 25:
        extra = generate_observable_set(
            generator_id='random_pauli',
            n_qubits=n_qubits,
            n_observables=30 - len(obs_list),
            seed=42,
        )
        for o in extra.observables:
            if o.pauli_string not in obs:
                obs_list.append(o)
    
    return obs_list


def build_h2_observables() -> list[Observable]:
    """H₂ Hamiltonian terms (STO-3G, Jordan-Wigner) + comprehensive coverage."""
    n = 4
    obs = set()
    
    # Core Hamiltonian terms
    hamiltonian = [
        'IIII',  # Identity (energy offset)
        'ZIII', 'IZII', 'IIZI', 'IIIZ',  # 1-body Z
        'ZZII', 'IIZZ', 'ZIZI', 'IZIZ', 'ZIIZ', 'IZZI',  # 2-body ZZ
        'XXXX', 'YYYY', 'XXYY', 'YYXX',  # 4-body hopping
        'XXII', 'YYII', 'IIXX', 'IIYY',  # 2-body hopping
    ]
    obs.update(hamiltonian)
    
    # Complete 1-local coverage
    for i in range(n):
        for p in ['X', 'Y', 'Z']:
            obs.add('I' * i + p + 'I' * (n - i - 1))
    
    # Additional 2-local
    for i in range(n - 1):
        for p1, p2 in [('X','Y'), ('Y','X'), ('X','Z'), ('Z','X')]:
            obs.add('I' * i + p1 + p2 + 'I' * (n - i - 2))
    
    # 3-local
    for i in range(n - 2):
        obs.add('I' * i + 'ZZZ' + 'I' * (n - i - 3))
        obs.add('I' * i + 'XXX' + 'I' * (n - i - 3))
    
    return [Observable(s) for s in obs if s != 'IIII']


def build_lih_observables() -> list[Observable]:
    """LiH Hamiltonian terms + comprehensive coverage."""
    return build_comprehensive_observables(6, min_observables=30)


def build_qaoa_observables(n_qubits: int) -> list[Observable]:
    """QAOA MAX-CUT observables - cost function + comprehensive coverage."""
    obs = set()
    
    # Cost function: ZZ on ring edges
    for i in range(n_qubits):
        j = (i + 1) % n_qubits
        s = ['I'] * n_qubits
        s[i] = 'Z'
        s[j] = 'Z'
        obs.add(''.join(s))
    
    # Single-qubit (mixer dynamics)
    for i in range(n_qubits):
        for p in ['X', 'Y', 'Z']:
            s = ['I'] * n_qubits
            s[i] = p
            obs.add(''.join(s))
    
    # Additional 2-body correlators
    for i in range(n_qubits - 1):
        for p1, p2 in [('X','X'), ('Y','Y'), ('X','Y')]:
            obs.add('I' * i + p1 + p2 + 'I' * (n_qubits - i - 2))
    
    # 3-body (energy fluctuations)
    if n_qubits >= 3:
        for i in range(n_qubits - 2):
            obs.add('I' * i + 'ZZZ' + 'I' * (n_qubits - i - 3))
    
    # Full weight
    obs.add('Z' * n_qubits)
    obs.add('X' * n_qubits)
    
    obs_list = [Observable(s) for s in obs]
    
    # Pad if needed
    if len(obs_list) < 25:
        extra = generate_observable_set(
            generator_id='random_pauli',
            n_qubits=n_qubits,
            n_observables=30 - len(obs_list),
            seed=42,
        )
        for o in extra.observables:
            if o.pauli_string not in obs:
                obs_list.append(o)
    
    return obs_list


def build_random_observables(n_qubits: int, n_obs: int, seed: int = 42) -> list[Observable]:
    """Random Pauli observables with guaranteed locality coverage."""
    obs = set()
    
    # Ensure at least some observables at each locality level
    # 1-local
    for i in range(min(n_qubits, 6)):
        for p in ['X', 'Y', 'Z']:
            obs.add('I' * i + p + 'I' * (n_qubits - i - 1))
    
    # 2-local
    for i in range(min(n_qubits - 1, 5)):
        obs.add('I' * i + 'ZZ' + 'I' * (n_qubits - i - 2))
        obs.add('I' * i + 'XX' + 'I' * (n_qubits - i - 2))
    
    # n-local
    obs.add('Z' * n_qubits)
    obs.add('X' * n_qubits)
    
    # Fill remainder with random
    if len(obs) < n_obs:
        extra = generate_observable_set(
            generator_id='random_pauli',
            n_qubits=n_qubits,
            n_observables=n_obs - len(obs) + 10,
            seed=seed,
        )
        for o in extra.observables:
            if o.pauli_string not in obs:
                obs.add(o.pauli_string)
                if len(obs) >= n_obs:
                    break
    
    return [Observable(s) for s in list(obs)[:n_obs]]


print("Observable builders defined!")
print("All circuits now have 25-30+ observables with mixed localities (k=1,2,3,...,n)")

---

## 2. Circuit Selection

**Configure which circuits to benchmark.** Set `True` to run, `False` to skip.

In [4]:
# =============================================================================
# CIRCUIT SELECTION - SET True/False TO RUN/SKIP
# =============================================================================

CIRCUITS_TO_RUN = {
    # -------------------------------------------------------------------------
    # WORKSTREAM S: SHADOWS CORE
    # -------------------------------------------------------------------------
    'S-SMOKE-3Q': False,    # 3-qubit GHZ (quick smoke test)
    'S-SMOKE-4Q': False,    # 4-qubit GHZ (quick smoke test)
    'S-T01-4Q':   False,    # 4-qubit GHZ (full validation)
    'S-T01-5Q':   False,   # 5-qubit GHZ (stretch goal)
    'S-BELL-2':   True,    # 2 Bell pairs (4 qubits)
    'S-BELL-3':   False,   # 3 Bell pairs (6 qubits)
    'S-BELL-4':   False,   # 4 Bell pairs (8 qubits)
    'S-CLIFF-5':  False,   # 5-qubit random Clifford
    'S-ISING-4':  False,    # 4-qubit Ising Trotter
    'S-ISING-6':  False,   # 6-qubit Ising Trotter
    
    # -------------------------------------------------------------------------
    # WORKSTREAM C: CHEMISTRY
    # -------------------------------------------------------------------------
    'C-H2':       False,    # H₂ molecule (4 qubits)
    'C-LiH':      False,   # LiH molecule (6 qubits)
    'C-BeH2':     False,   # BeH₂ molecule (8 qubits)
    
    # -------------------------------------------------------------------------
    # WORKSTREAM O: OPTIMIZATION
    # -------------------------------------------------------------------------
    'O-QAOA-5-P1': False,   # QAOA MAX-CUT 5 qubits, p=1
    'O-QAOA-5-P2': False,  # QAOA MAX-CUT 5 qubits, p=2
    'O-QAOA-7-P1': False,  # QAOA MAX-CUT 7 qubits, p=1
    
    # -------------------------------------------------------------------------
    # WORKSTREAM B: BENCHMARKING
    # -------------------------------------------------------------------------
    'B-XEB-3':    False,   # 3-qubit XEB
    'B-XEB-5':    False,   # 5-qubit XEB
    'B-CLIFF-3':  False,   # 3-qubit Clifford RB
    
    # -------------------------------------------------------------------------
    # WORKSTREAM M: METROLOGY
    # -------------------------------------------------------------------------
    'M-PHASE-3':  False,   # 3-qubit GHZ phase sensing
    'M-PHASE-4':  False,   # 4-qubit GHZ phase sensing
}

# Count enabled circuits
enabled = [k for k, v in CIRCUITS_TO_RUN.items() if v]
print(f"Circuits to run: {len(enabled)} / {len(CIRCUITS_TO_RUN)}")
for c in enabled:
    print(f"  ✓ {c}")

Circuits to run: 1 / 21
  ✓ S-BELL-2


In [5]:
# =============================================================================
# BUILD SELECTED CIRCUITS AND OBSERVABLES
# =============================================================================

CIRCUIT_DEFINITIONS = {
    # WORKSTREAM S
    'S-SMOKE-3Q': (build_ghz(3), build_ghz_observables(3)),
    'S-SMOKE-4Q': (build_ghz(4), build_ghz_observables(4)),
    'S-T01-4Q':   (build_ghz(4), build_ghz_observables(4)),
    'S-T01-5Q':   (build_ghz(5), build_ghz_observables(5)),
    'S-BELL-2':   (build_bell_pairs(2), build_bell_observables(2)),
    'S-BELL-3':   (build_bell_pairs(3), build_bell_observables(3)),
    'S-BELL-4':   (build_bell_pairs(4), build_bell_observables(4)),
    'S-CLIFF-5':  (build_random_clifford(5, 10), build_random_observables(5, 50)),
    'S-ISING-4':  (build_ising_trotter(4, 3), build_ising_observables(4)),
    'S-ISING-6':  (build_ising_trotter(6, 3), build_ising_observables(6)),
    
    # WORKSTREAM C
    'C-H2':       (build_h2_ansatz(0.5), build_h2_observables()),
    'C-LiH':      (build_lih_ansatz(0.5), build_lih_observables()),
    'C-BeH2':     (build_beh2_ansatz(0.5), build_random_observables(8, 30)),
    
    # WORKSTREAM O
    'O-QAOA-5-P1': (build_qaoa_maxcut_ring(5, p=1), build_qaoa_observables(5)),
    'O-QAOA-5-P2': (build_qaoa_maxcut_ring(5, p=2), build_qaoa_observables(5)),
    'O-QAOA-7-P1': (build_qaoa_maxcut_ring(7, p=1), build_qaoa_observables(7)),
    
    # WORKSTREAM B
    'B-XEB-3':    (build_xeb_circuit(3, 10), build_random_observables(3, 20)),
    'B-XEB-5':    (build_xeb_circuit(5, 10), build_random_observables(5, 40)),
    'B-CLIFF-3':  (build_random_clifford(3, 10), build_random_observables(3, 20)),
    
    # WORKSTREAM M
    'M-PHASE-3':  (build_ghz_phase_sensing(3, 0.1), build_ghz_observables(3)),
    'M-PHASE-4':  (build_ghz_phase_sensing(4, 0.1), build_ghz_observables(4)),
}

# Build selected circuits
circuits_to_benchmark = {}
for circuit_id, should_run in CIRCUITS_TO_RUN.items():
    if should_run and circuit_id in CIRCUIT_DEFINITIONS:
        circuit, obs_list = CIRCUIT_DEFINITIONS[circuit_id]
        
        # Build locality map
        locality_map = {}
        for obs in obs_list:
            locality = sum(1 for c in obs.pauli_string if c != 'I')
            locality_map[obs.observable_id] = locality
        
        obs_set = ObservableSet(
            observables=obs_list,
            observable_set_id=f'{circuit_id}_obs',
            generator_id='research',
            generator_seed=42,
        )
        
        circuits_to_benchmark[circuit_id] = {
            'circuit': circuit,
            'observables': obs_set,
            'locality_map': locality_map,
            'n_qubits': circuit.num_qubits,
            'n_observables': len(obs_set),
        }

print(f"\nBuilt {len(circuits_to_benchmark)} circuits:")
for cid, info in circuits_to_benchmark.items():
    print(f"  {cid}: {info['n_qubits']} qubits, {info['n_observables']} observables")


Built 1 circuits:
  S-BELL-2: 4 qubits, 6 observables


---

## 3. Benchmark Configuration

In [6]:
# =============================================================================
# BENCHMARK CONFIGURATION
# =============================================================================

BENCHMARK_CONFIG = BenchmarkSuiteConfig(
    mode=BenchmarkMode.ANALYSIS,  # Full analysis with all enhanced features
    n_shots_grid=[100, 500, 1000, 5000],
    n_replicates=10,              # Increase for publication (20+)
    seed=42,
    epsilon=0.01,
    delta=0.05,
    shadows_protocol_id="classical_shadows_v0",
    baseline_protocol_id="direct_grouped",
    output_base_dir="research_benchmark_results",
)

print("Benchmark configuration:")
print(f"  Mode: {BENCHMARK_CONFIG.mode.value}")
print(f"  Shot grid: {BENCHMARK_CONFIG.n_shots_grid}")
print(f"  Replicates: {BENCHMARK_CONFIG.n_replicates}")
print(f"  Output: {BENCHMARK_CONFIG.output_base_dir}/")

Benchmark configuration:
  Mode: analysis
  Shot grid: [100, 500, 1000, 5000]
  Replicates: 10
  Output: research_benchmark_results/


---

## 4. Run Benchmarks

In [7]:
%%time
# =============================================================================
# RUN ALL SELECTED BENCHMARKS
# =============================================================================

all_results = {}

for i, (circuit_id, info) in enumerate(circuits_to_benchmark.items(), 1):
    print(f"\n{'='*80}")
    print(f"BENCHMARK {i}/{len(circuits_to_benchmark)}: {circuit_id}")
    print(f"{'='*80}")
    
    result = run_benchmark_suite(
        circuit=info['circuit'],
        observable_set=info['observables'],
        circuit_id=circuit_id,
        config=BENCHMARK_CONFIG,
        locality_map=info['locality_map'],
    )
    
    all_results[circuit_id] = result

print(f"\n\n{'='*80}")
print(f"ALL BENCHMARKS COMPLETE: {len(all_results)} circuits")
print(f"{'='*80}")


BENCHMARK 1/1: S-BELL-2
BENCHMARK SUITE: ANALYSIS
Run ID: S-BELL-2_20260116_154231_f48be761
Output: research_benchmark_results\S-BELL-2_20260116_154231_f48be761
Mode: analysis

Step 1: Running base benchmark...
  Completed: 720 rows

Step 2: Running all 8 tasks...
  Completed: 12 task evaluations

Step 3: Running comprehensive analysis...
  Comprehensive analysis complete

Step 4: Generating reports...
  Basic report: research_benchmark_results\S-BELL-2_20260116_154231_f48be761\basic_report.md
  Complete report: research_benchmark_results\S-BELL-2_20260116_154231_f48be761\complete_report.md
  Analysis report: research_benchmark_results\S-BELL-2_20260116_154231_f48be761\analysis_report.md
  Analysis JSON: research_benchmark_results\S-BELL-2_20260116_154231_f48be761\analysis.json

BENCHMARK COMPLETE
Output directory: research_benchmark_results\S-BELL-2_20260116_154231_f48be761
Reports generated: ['basic', 'complete', 'analysis', 'analysis_json', 'config', 'manifest']



ALL BENCHMARKS C

---

## 5. Consolidated Results

In [8]:
# =============================================================================
# CONSOLIDATED SUMMARY ACROSS ALL CIRCUITS
# =============================================================================

from collections import defaultdict

print("="*100)
print("CONSOLIDATED BENCHMARK RESULTS")
print("="*100)
print()

# Summary table
print(f"{'Circuit':<15} {'Qubits':>6} {'Obs':>5} {'Shadows SE':>12} {'Grouped SE':>12} {'Ratio':>8} {'Winner':>15}")
print("-" * 85)

shadows_wins = 0
grouped_wins = 0

for circuit_id, result in all_results.items():
    info = circuits_to_benchmark[circuit_id]
    summaries = result.summary.get('protocol_summaries', {})
    
    shadows_se = summaries.get('classical_shadows_v0', {}).get('mean_se', float('inf'))
    grouped_se = summaries.get('direct_grouped', {}).get('mean_se', float('inf'))
    
    ratio = shadows_se / grouped_se if grouped_se > 0 else float('inf')
    winner = 'Shadows' if ratio < 1 else 'Grouped'
    
    if ratio < 1:
        shadows_wins += 1
    else:
        grouped_wins += 1
    
    print(f"{circuit_id:<15} {info['n_qubits']:>6} {info['n_observables']:>5} "
          f"{shadows_se:>12.4f} {grouped_se:>12.4f} {ratio:>8.2f}x {winner:>15}")

print("-" * 85)
print(f"\nOverall: Shadows wins {shadows_wins}, Grouped wins {grouped_wins}")
print()

CONSOLIDATED BENCHMARK RESULTS

Circuit         Qubits   Obs   Shadows SE   Grouped SE    Ratio          Winner
-------------------------------------------------------------------------------------
S-BELL-2             4     6       0.0408       0.0082     5.00x         Grouped
-------------------------------------------------------------------------------------

Overall: Shadows wins 0, Grouped wins 1



In [9]:
# =============================================================================
# DETAILED TASK SUMMARY FOR EACH CIRCUIT
# =============================================================================

def generate_task_summary(result, circuit_id, info):
    """Generate task summary for a single circuit."""
    
    long_form = result.long_form_results
    truth_values = result.ground_truth.truth_values if result.ground_truth else {}
    max_n = max(result.summary.get('n_shots_grid', [5000]))
    epsilon = 0.01
    
    # Group by protocol and N
    by_protocol_n = defaultdict(lambda: defaultdict(list))
    for row in long_form:
        by_protocol_n[row.protocol_id][row.N_total].append(row)
    
    protocols = list(by_protocol_n.keys())
    
    summary = {
        'circuit_id': circuit_id,
        'n_qubits': info['n_qubits'],
        'n_observables': info['n_observables'],
    }
    
    # Task 1: N* for worst-case
    task1 = {}
    for protocol in protocols:
        n_star = None
        for n in sorted(by_protocol_n[protocol].keys()):
            rows = by_protocol_n[protocol][n]
            max_se = max(r.se for r in rows if r.se is not None)
            if max_se <= epsilon:
                n_star = n
                break
        task1[protocol] = n_star if n_star else f'>{max_n}'
    summary['task1_n_star'] = task1
    
    # Task 3: Distribution at max N
    task3 = {}
    for protocol in protocols:
        rows = by_protocol_n[protocol][max_n]
        ses = [r.se for r in rows if r.se is not None]
        if ses:
            task3[protocol] = {
                'mean': np.mean(ses),
                'max': np.max(ses),
            }
    summary['task3_distribution'] = task3
    
    # Task 4: Dominance
    obs_best = defaultdict(lambda: defaultdict(float))
    for protocol in protocols:
        rows = by_protocol_n[protocol][max_n]
        for row in rows:
            if row.observable_id not in obs_best or row.se < obs_best[row.observable_id]['se']:
                obs_best[row.observable_id] = {'se': row.se, 'protocol': protocol}
    
    wins = defaultdict(int)
    for obs_id, data in obs_best.items():
        wins[data['protocol']] += 1
    
    summary['task4_wins'] = dict(wins)
    
    return summary


# Generate summaries
print("\n" + "="*100)
print("TASK-BY-TASK SUMMARY FOR EACH CIRCUIT")
print("="*100)

for circuit_id, result in all_results.items():
    info = circuits_to_benchmark[circuit_id]
    summary = generate_task_summary(result, circuit_id, info)
    
    print(f"\n--- {circuit_id} ({summary['n_qubits']}q, {summary['n_observables']} obs) ---")
    
    print(f"\n  TASK 1 (Worst-Case N*):")
    for p, n_star in summary['task1_n_star'].items():
        print(f"    {p}: N* = {n_star}")
    
    print(f"\n  TASK 3 (Distribution at max N):")
    for p, stats in summary['task3_distribution'].items():
        print(f"    {p}: mean={stats['mean']:.4f}, max={stats['max']:.4f}")
    
    print(f"\n  TASK 4 (Dominance):")
    total = sum(summary['task4_wins'].values())
    for p, wins in summary['task4_wins'].items():
        pct = 100 * wins / total if total > 0 else 0
        print(f"    {p}: wins {wins}/{total} ({pct:.1f}%)")


TASK-BY-TASK SUMMARY FOR EACH CIRCUIT

--- S-BELL-2 (4q, 6 obs) ---

  TASK 1 (Worst-Case N*):
    direct_grouped: N* = >5000
    direct_optimized: N* = >5000
    classical_shadows_v0: N* = >5000

  TASK 3 (Distribution at max N):
    direct_grouped: mean=0.0082, max=0.0245
    direct_optimized: mean=0.0082, max=0.0245
    classical_shadows_v0: mean=0.0408, max=0.0445

  TASK 4 (Dominance):
    direct_grouped: wins 5/6 (83.3%)
    direct_optimized: wins 1/6 (16.7%)


In [10]:
# =============================================================================
# SAVE CONSOLIDATED REPORT
# =============================================================================

import json
from pathlib import Path
from datetime import datetime

# Create consolidated report
consolidated = {
    'timestamp': datetime.now().isoformat(),
    'n_circuits': len(all_results),
    'circuits': {},
}

for circuit_id, result in all_results.items():
    consolidated['circuits'][circuit_id] = {
        'run_id': result.run_id,
        'output_dir': str(result.output_dir),
        'summary': result.summary,
    }

# Save
output_dir = Path(BENCHMARK_CONFIG.output_base_dir)
output_dir.mkdir(parents=True, exist_ok=True)

consolidated_path = output_dir / f'consolidated_results_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'
with open(consolidated_path, 'w') as f:
    json.dump(consolidated, f, indent=2, default=str)

print(f"Consolidated results saved to: {consolidated_path}")
print(f"\nIndividual run directories:")
for circuit_id, result in all_results.items():
    print(f"  {circuit_id}: {result.output_dir}")

Consolidated results saved to: research_benchmark_results\consolidated_results_20260116_154410.json

Individual run directories:
  S-BELL-2: research_benchmark_results\S-BELL-2_20260116_154231_f48be761


---

## Summary

This notebook benchmarked classical shadows against direct measurement baselines across all research circuits.

### Circuits Benchmarked

| Workstream | Circuits | Description |
|------------|----------|-------------|
| **S** | GHZ, Bell, Clifford, Ising | Core shadows validation |
| **C** | H₂, LiH, BeH₂ | Molecular chemistry |
| **O** | QAOA MAX-CUT | Combinatorial optimization |
| **B** | XEB, Clifford RB | Random circuit benchmarking |
| **M** | GHZ phase sensing | Quantum metrology |

### Key Metrics

- **N*** (worst-case): Shots needed for max SE ≤ ε
- **SSR** (shot-savings ratio): N*_baseline / N*_shadows
- **Dominance**: % of observables where shadows wins

### Results Location

All results saved to `research_benchmark_results/` with unique timestamps.