In [25]:
import json
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import Dict, List, Tuple, Optional
import git
repo_path = git.Repo('.', search_parent_directories=True).working_tree_dir

In [26]:


def _extract_series(data: dict, closest_d_tol: float = 1e-6) -> dict:
    """Parse one JSON file (one run) into a canonical dict.

    Works with both of your formats:
      - exact_min from min(eigenvalues)
      - exact_min from min_eigenvalue

    And diff series from one of: diff, diff_to_mins, diff_to_exact, diff_to_exact_min.
    """
    all_run_data = data.get("all_run_data", [])
    if not all_run_data:
        raise ValueError("JSON has no all_run_data")

    # columnize all_run_data
    keys = set().union(*[d.keys() for d in all_run_data])
    cols = {k: [d.get(k) for d in all_run_data] for k in keys}

    # exact minimum energy (if present)
    if "eigenvalues" in data and data["eigenvalues"] is not None:
        exact_min = float(min(data["eigenvalues"]))
    elif "min_eigenvalue" in data and data["min_eigenvalue"] is not None:
        exact_min = float(data["min_eigenvalue"])
    else:
        exact_min = float("nan")

    # series
    reduction = cols.get("reduction")
    if reduction is None:
        raise KeyError("Could not find 'reduction' in all_run_data")

    H_reduced_e = cols.get("H_reduced_e")

    # diff series (try a few common keys)
    diff = None
    for k in ("diff", "diff_to_mins", "diff_to_exact", "diff_to_exact_min"):
        if k in cols and cols[k] is not None:
            diff = cols[k]
            break
    if diff is None:
        raise KeyError("Could not find a diff series in all_run_data")

    # D / step axis
    if "D" in cols and cols["D"] is not None and any(v is not None for v in cols["D"]):
        # ensure sorted by D
        D = [int(v) if v is not None else None for v in cols["D"]]
        order = [i for i,_ in sorted(enumerate(D), key=lambda t: (t[1] is None, t[1]))]
        def _reorder(a):
            return [a[i] for i in order] if a is not None else None
        reduction = _reorder(reduction)
        H_reduced_e = _reorder(H_reduced_e)
        diff = _reorder(diff)
        D_sorted = _reorder(D)
        final_k = int(max([d for d in D_sorted if d is not None], default=len(reduction)-1)) + 1
    else:
        final_k = int(data.get("final_k", len(reduction)))

    # tolerance index
    diff_float = [float(x) if x is not None else float("nan") for x in diff]
    first_idx_tol = next((i for i, d in enumerate(diff_float) if abs(d) <= closest_d_tol), None)

    num_samples = cols.get("num_samples")

    return {
        "N": int(data.get("N")) if data.get("N") is not None else None,
        "cutoff": int(data.get("cutoff")) if data.get("cutoff") is not None else None,
        "exact_min": exact_min,
        "final_k": final_k,
        "reduced_eigenvalues": H_reduced_e,
        "reduction_percentages": [float(x) if x is not None else float("nan") for x in reduction],
        "diff_to_mins": diff_float,
        "num_samples": num_samples,
        "first_idx_tol": first_idx_tol,
    }


def load_run_file(path: str, closest_d_tol: float = 1e-6) -> dict:
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    row = _extract_series(data, closest_d_tol=closest_d_tol)
    row["path"] = path
    return row


def load_runs_in_folder(folder: str, *, pattern: Optional[str] = None, closest_d_tol: float = 1e-6):
    folder = str(folder)
    pat = pattern or "*.json"
    paths = sorted([str(p) for p in Path(folder).glob(pat)])

    rows = []
    for p in paths:
        try:
            rows.append(load_run_file(p, closest_d_tol=closest_d_tol))
        except Exception as e:
            print(f"Skipping {p}: {e}")
    return rows


def aggregate_timeseries(rows: List[dict], key: str) -> Tuple[np.ndarray, np.ndarray]:
    """Return (mean, std) over runs for a time-series field.

    Pads with NaNs if runs have different lengths and uses nanmean/nanstd.
    """
    series = [np.asarray(r[key], dtype=float) for r in rows if r.get(key) is not None]
    if not series:
        return np.array([]), np.array([])
    max_len = max(len(s) for s in series)
    mat = np.full((len(series), max_len), np.nan, dtype=float)
    for i, s in enumerate(series):
        mat[i, : len(s)] = s
    return np.nanmean(mat, axis=0), np.nanstd(mat, axis=0)


def aggregate_scalar(rows: List[dict], key: str) -> Tuple[float, float]:
    vals = [r.get(key) for r in rows if r.get(key) is not None]
    vals = [float(v) for v in vals if np.isfinite(v)]
    if not vals:
        return float("nan"), float("nan")
    return float(np.mean(vals)), float(np.std(vals, ddof=0))


In [27]:
def plot_mean_std(
    x: np.ndarray,
    mean: np.ndarray,
    std: np.ndarray,
    ax=None,
    label: str = "",
    show: bool = True,
    ylabel: str = "",
    yscale: str = "",
    xlabel: str = "D",
    title: Optional[str] = None,
    mark_first_within_tol: Optional[int] = None,
    tol_label: Optional[str] = None,
):
    created = False
    if ax is None:
        fig, ax = plt.subplots(figsize=(9, 5))
        created = True

    x = np.asarray(x, dtype=float)
    mean = np.asarray(mean, dtype=float)
    std = np.asarray(std, dtype=float)

    # ---- LOG-SAFE ERROR BARS ----
    if yscale == "log":
        eps = 1e-16  # floor to avoid 0/negative on log axis

        y = np.maximum(mean, eps)
        low = np.maximum(mean - std, eps)
        high = np.maximum(mean + std, eps)

        # asymmetric yerr: [lower_err, upper_err]
        yerr = np.vstack([y - low, high - y])

        ax.errorbar(
            x,
            y,
            yerr=yerr,
            fmt="o-",
            markersize=4,
            linewidth=1.6,
            capsize=3,
            elinewidth=1.2,
            alpha=0.95,
            label=label,
        )
        ax.set_ylim(eps, None)  # keeps autoscale sane near 0

    else:
        # normal symmetric error bars for linear scale
        ax.errorbar(
            x,
            mean,
            yerr=std,
            fmt="o-",
            markersize=4,
            linewidth=1.6,
            capsize=3,
            elinewidth=1.2,
            alpha=0.95,
            label=label,
        )

    if mark_first_within_tol is not None:
        ax.axvline(
            mark_first_within_tol,
            linestyle="--",
            linewidth=1.2,
            alpha=0.8,
            label=(tol_label or "first within tol"),
        )

    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    if yscale:
        ax.set_yscale(yscale)
    if title:
        ax.set_title(title)
    ax.grid(True, alpha=0.3)
    ax.legend(loc="best")

    if created and show:
        plt.tight_layout()
        plt.show()

    return ax


In [30]:
root = os.path.join(repo_path, r"SUSY\Wess-Zumino\Qiskit\SKQD\Files")
bc = "dirichlet"
potential = "linear"

sites = [4]
cutoffs = [16]
conv_tol = 1e-6

basis_folders = ["Fock+Basis", "Fock+Full", "Fock+Full+NoTrunc"]  # adjust names if needed
basis_folders = ["5000", "10000", "50000", "100000"]
agg_rows = []

for N in sites:
    for basis in basis_folders:

        #folder = os.path.join(root, basis, bc, potential, f"N{N}", "10000")
        folder = os.path.join(root, "Fock+Full", bc, potential, f"N{N}", basis)

        for cutoff in cutoffs:
            pattern = f"{potential}_L{cutoff}_*.json"
            runs = load_runs_in_folder(folder, pattern=pattern, closest_d_tol=conv_tol)

            if not runs:
                continue

            # aggregate series
            red_mean, red_std = aggregate_timeseries(runs, "reduction_percentages")
            diff_mean, diff_std = aggregate_timeseries(runs, "diff_to_mins")

            # aggregate scalars
            exact_min_mean, exact_min_std = aggregate_scalar(runs, "exact_min")

            first_idx_tol_mean = next(
                (i for i, d in enumerate(diff_mean) if np.isfinite(d) and abs(d) <= conv_tol),
                None
            )

            agg_rows.append({
                "bc": bc,
                "potential": potential,
                "N": N,
                "cutoff": cutoff,
                "basis_folder": basis,          # <-- new
                # "shots": None,                     # <-- no longer relevant in this layout
                "exact_min_mean": exact_min_mean,
                "exact_min_std": exact_min_std,
                "first_idx_tol_mean": first_idx_tol_mean,
                "red_mean": red_mean,
                "red_std": red_std,
                "diff_mean": diff_mean,
                "diff_std": diff_std,
                "n_runs": len(runs),
            })


In [31]:
# --------------------
# Plot: mean ± std per shots for each (N, cutoff)
# --------------------
out_dir = os.path.join(root, "plots")
os.makedirs(out_dir, exist_ok=True)

# 1) Diff to min (mean ± std)
for N in sites:
    for cutoff in cutoffs:
        group = [r for r in agg_rows if r["N"] == N and r["cutoff"] == cutoff]
        if not group:
            continue

        # sort curves by shots so legend is ordered nicely
        #group = sorted(group, key=lambda r: int(r["shots"]))

        fig, ax = plt.subplots(figsize=(9, 5))
        for r in group:
            x = np.arange(len(r["diff_mean"]))
            plot_mean_std(
                x,
                r["diff_mean"],
                r["diff_std"],
                ax=ax,
                label=f"{r["basis_folder"]}",#f"{r['shots']} shots (n={r['n_runs']})",
                show=False,
                ylabel="|Δ_exact|",
                yscale="log",
                title=f"{bc} - {potential} - N{N} - Λ{cutoff}: |Δ_exact| (mean ± std)",
                # mark_first_within_tol=r.get("first_idx_tol_mean"),
                tol_label=f"first ≤ {conv_tol:g}",
            )
        eps = 1e-12
        ax.set_ylim(eps, None)
        plt.tight_layout()
        out = os.path.join(out_dir, f"diff_meanstd_{bc}_{potential}_N{N}_L{cutoff}.png")
        plt.savefig(out, dpi=200)
        plt.close(fig)

# 2) Reduction (mean ± std)
for N in sites:
    for cutoff in cutoffs:
        group = [r for r in agg_rows if r["N"] == N and r["cutoff"] == cutoff]
        if not group:
            continue

        #group = sorted(group, key=lambda r: int(r["shots"]))

        fig, ax = plt.subplots(figsize=(9, 5))
        for r in group:
            x = np.arange(len(r["red_mean"]))

            # If your "red_mean" is "reduction_percentages" (i.e. percent removed),
            # then "100 - red" is percent kept. Keep whichever you intend:
            mean = 100.0 - r["red_mean"]
            std = r["red_std"]

            plot_mean_std(
                x,
                mean,
                std,
                ax=ax,
                label=f"{r["basis_folder"]}",#f"{r['shots']} shots (n={r['n_runs']})",
                show=False,
                ylabel="H Reduction %",
                yscale="linear",
                title=f"{bc} - {potential} - N{N} - Λ{cutoff}: H Reduction % (mean ± std)",
            )

        plt.tight_layout()
        out = os.path.join(out_dir, f"reduction_meanstd_{bc}_{potential}_N{N}_L{cutoff}.png")
        plt.savefig(out, dpi=200)
        plt.close(fig)

print("Done. Plots written to:", out_dir)


Done. Plots written to: c:\Users\Johnk\Documents\PhD\Quantum Computing Code\Quantum-Computing\SUSY\Wess-Zumino\Qiskit\SKQD\Files\plots
