In [9]:
# Jupyter-friendly / latest Qiskit style
import json
import numpy as np
from pathlib import Path
from ast import literal_eval
from qiskit.quantum_info import SparsePauliOp
from scipy.sparse.linalg import eigsh

In [4]:
def _parse_coeff(x):
    """
    Robust parser for coefficients if they are stored as floats OR strings like '(0.25+0j)'.
    """
    if isinstance(x, (int, float, complex, np.number)):
        return complex(x)
    if isinstance(x, str):
        return complex(literal_eval(x))
    raise TypeError(f"Unsupported coefficient type: {type(x)}")


def reconstruct_from_energy_contrib_threshold(
    json_path,
    threshold,
    *,
    strict=True,              # True => abs(contrib) > threshold, False => >=
    drop_identity=False,      # optionally remove identity term
    simplify_atol=0.0,        # optional simplify tolerance
    return_mask=False,
):
    """
    Reconstruct a SparsePauliOp Hamiltonian keeping only terms with
    abs(pauli_energy_contrib_ground) > threshold.

    Parameters
    ----------
    json_path : str or Path
        Path to your Hamiltonian JSON.
    threshold : float
        Threshold on abs(pauli_energy_contrib_ground).
    strict : bool
        If True, keep abs(contrib) > threshold. If False, keep abs(contrib) >= threshold.
    drop_identity : bool
        If True, drop the all-identity term (e.g. 'IIII...').
    simplify_atol : float
        Passed to SparsePauliOp.simplify(atol=...).
    return_mask : bool
        If True, also return the boolean mask used.

    Returns
    -------
    H_pruned : SparsePauliOp
    info : dict
        Diagnostics about kept/dropped terms.
    mask : np.ndarray (optional)
        Boolean mask of kept terms.
    """
    json_path = Path(json_path)
    with open(json_path, "r") as f:
        d = json.load(f)

    labels = d["pauli_labels"]
    coeffs = [_parse_coeff(c) for c in d["pauli_coeffs"]]
    contribs = np.asarray(d["pauli_energy_contrib_ground"], dtype=float)

    if not (len(labels) == len(coeffs) == len(contribs)):
        raise ValueError(
            f"Length mismatch: labels={len(labels)}, coeffs={len(coeffs)}, contribs={len(contribs)}"
        )

    # Threshold condition
    if strict:
        mask = np.abs(contribs) > float(threshold)
    else:
        mask = np.abs(contribs) >= float(threshold)

    # Optionally remove identity term
    if drop_identity:
        identity_mask = np.array([set(lbl) == {"I"} for lbl in labels], dtype=bool)
        mask = mask & (~identity_mask)

    kept_labels = [lbl for lbl, m in zip(labels, mask) if m]
    kept_coeffs = [c for c, m in zip(coeffs, mask) if m]

    if len(kept_labels) == 0:
        raise ValueError(
            "No Pauli terms survived the threshold. Lower the threshold or disable drop_identity."
        )

    H_pruned = SparsePauliOp(kept_labels, coeffs=np.asarray(kept_coeffs, dtype=complex))

    # Optional simplify (merges duplicate labels if any, removes near-zero coeffs)
    if simplify_atol is not None:
        H_pruned = H_pruned.simplify(atol=float(simplify_atol))

    info = {
        "json_path": str(json_path),
        "threshold": float(threshold),
        "strict": strict,
        "drop_identity": drop_identity,
        "num_terms_total": len(labels),
        "num_terms_kept_before_simplify": int(np.sum(mask)),
        "num_terms_dropped": int(len(labels) - np.sum(mask)),
        "num_terms_final": len(H_pruned.paulis),
        "kept_fraction": float(np.sum(mask) / len(labels)),
        "sum_abs_contrib_kept": float(np.sum(np.abs(contribs[mask]))),
        "sum_abs_contrib_dropped": float(np.sum(np.abs(contribs[~mask]))),
    }

    if return_mask:
        return H_pruned, info, mask
    return H_pruned, info

In [39]:
json_path = r"C:\Users\Johnk\Documents\PhD\Quantum Computing Code\Quantum-Computing\SUSY\Wess-Zumino\Analyses\Model Checks\GroundstatePauliContributions\dirichlet\linear\N3\linear_16.json"
threshold = 1e-6

H_pruned, info = reconstruct_from_energy_contrib_threshold(
    json_path,
    threshold=threshold,
    strict=True,         # abs(contrib) > threshold
    drop_identity=False, # set True if you want to force-remove constant shift
    simplify_atol=0.0
)

print(info)
print("First few labels:", H_pruned.paulis.to_labels()[:10])
print("First few coeffs:", H_pruned.coeffs[:10])

{'json_path': 'C:\\Users\\Johnk\\Documents\\PhD\\Quantum Computing Code\\Quantum-Computing\\SUSY\\Wess-Zumino\\Analyses\\Model Checks\\GroundstatePauliContributions\\dirichlet\\linear\\N3\\linear_16.json', 'threshold': 1e-06, 'strict': True, 'drop_identity': False, 'num_terms_total': 1146, 'num_terms_kept_before_simplify': 342, 'num_terms_dropped': 804, 'num_terms_final': 342, 'kept_fraction': 0.29842931937172773, 'sum_abs_contrib_kept': 67.01742493447539, 'sum_abs_contrib_dropped': 4.3980161804815656e-05}
First few labels: ['IIIIIIIIIIIIIII', 'IIIIIIIIIIIIIZI', 'IIIIIIIIIIIIIZZ', 'IIIIIIIIIIIIZII', 'IIIIIIIIIIIIZIZ', 'IIIIIIIIIIIIZZI', 'IIIIIIIIIIIIZZZ', 'IIIIIIIIIIIZIII', 'IIIIIIIIIIIZIIZ', 'IIIIIIIIIIIZIZI']
First few coeffs: [26.25  +0.j -0.5625+0.j -0.5625+0.j -1.6875+0.j -0.5625+0.j -0.5625+0.j
  0.5625+0.j -3.9375+0.j -0.5625+0.j -0.5625+0.j]


In [40]:
H = H_pruned.to_matrix(sparse=True)

In [41]:
if H.shape[0] < 2000:
    H_dense = H.todense()
    used_dense=True
    eigenvalues, eigenvectors = np.linalg.eigh(H_dense)
else:
    used_dense=False
    eigenvalues, eigenvectors = eigsh(H, k=16, which="SA", return_eigenvectors=True)
min_index = int(np.argmin(eigenvalues))
eigenvalues = np.sort(eigenvalues)[:8].real

In [42]:
eigenvalues

array([-0.00330358,  0.99531376,  0.99669642,  1.22067297,  1.22144129,
        1.22144129,  1.22144248,  1.976396  ])