# Calculates expected number of laten qubits using Schumacher eigenstates PCA approximation

In [1]:
import json
import numpy as np

def load_dataset(path="beh2_states_dataset.json"):
    with open(path) as f:
        data = json.load(f)

    states = []

    for entry in data:
        # Convert [[re, im], ...] back to complex
        psi = np.array([complex(r, i) for r, i in entry["statevector"]], dtype=np.complex128)

        # Normalise (in case of tiny numerical deviations)
        psi = psi / np.linalg.norm(psi)

        # Convert to real vector: concatenate real and imag parts
        psi_real = np.concatenate([psi.real, psi.imag])

        states.append(psi_real)

    X = np.vstack(states)  # shape (N, 2*dim)
    return X


In [2]:
X = load_dataset()
print(X.shape)


(625, 32768)


In [3]:
from sklearn.decomposition import PCA

# Keep enough components to explain 99% of variance
pca = PCA(n_components=0.9999999, svd_solver="full")

X_reduced = pca.fit_transform(X)

print("Original dimension:", X.shape[1])
print("Reduced dimension:", X_reduced.shape[1])

Original dimension: 32768
Reduced dimension: 89


In [4]:
X_reconstructed = pca.inverse_transform(X_reduced)

def fidelity(psi_real_orig, psi_real_recon):
    # Convert real representation back to complex
    half = len(psi_real_orig) // 2
    a = psi_real_orig[:half] + 1j * psi_real_orig[half:]
    b = psi_real_recon[:half] + 1j * psi_real_recon[half:]

    # Normalise
    a = a / np.linalg.norm(a)
    b = b / np.linalg.norm(b)

    return np.abs(np.vdot(a, b))**2

fids = np.array([fidelity(X[i], X_reconstructed[i]) for i in range(X.shape[0])])
print("Mean fidelity:", fids.mean())
print("Median fidelity:", np.median(fids))
print("Min fidelity:", fids.min())


Mean fidelity: 0.999999908106443
Median fidelity: 0.999999953300578
Min fidelity: 0.9999987922174666
