In [1]:
import numpy as np 
import sys, os
sys.path.append('../Netket/')
import netket as nk
from jax import numpy as jnp
import itertools
from scipy.special import comb
from jax import jit, vmap
import jax
import matplotlib.pyplot as plt 
from cluster_expansion import fwht_coeffs_in_cluster_col_order, prepare_fwht_meta_cached, compress_and_reconstruct_cached, _get_topk_indices_jit
import analysis
from analysis import std_phase, ipr, pca_entropy, renyi_entropy, mean_amplitude, uniform_state_overlap, infidelity
import pandas as pd
from functools import partial

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
hypotheses = {
    "std_phase" : std_phase,
    "IPR" : ipr,
    "SPCA" : pca_entropy,
    "Renyi_2" : renyi_entropy,
    "uniform_state_overlap" : uniform_state_overlap,
    "mean_amplitude" : mean_amplitude,
}

data_root = '..'

h5_files_opt = [os.path.join(f"{data_root}/data/data_optimal_basis_rbm", f) for f in os.listdir(f'{data_root}/data/data_optimal_basis_rbm') if f.endswith('.h5')]
df_opt = analysis.load_outputs_to_dataframe(h5_files_opt, load_eigenstates=False)
df_opt = analysis.attach_hypotheses_fields(df_opt, hypotheses)
df_opt["idx"] = df_opt["file"].apply(lambda x: int(os.path.basename(x).split('_')[2]))
print(len(df_opt))

# h5_files_raw = [os.path.join(f"{data_root}/data/data_unrotated_basis_rbm", f) for f in os.listdir(f'{data_root}/data/data_unrotated_basis_rbm') if f.endswith('.h5')]
# df_raw = analysis.load_outputs_to_dataframe(h5_files_raw, load_eigenstates=False)
# df_raw = analysis.attach_hypotheses_fields(df_raw, hypotheses)
# df_raw["idx"] = df_raw["file"].apply(lambda x: int(os.path.basename(x).split('_')[2]))
# print(len(df_raw))

230


In [3]:
n_sites_test = 16
hilb_test = nk.hilbert.Spin(0.5, n_sites_test)
compr_idx_list = sorted(np.array(list(set(np.logspace(1, 16, 100, base=2, dtype=int)))))  

df_out = pd.DataFrame()

idx_list = df_opt['idx'].values
for i, idx in enumerate(idx_list):
    print('iteration ', i)
    row = df_opt.loc[df_opt['idx'] == idx]
    if row.empty:
        raise KeyError(f"idx {idx} not found in df_opt")

    psi_test_exact = np.array(row['psi_0'].iloc[0])
    psi_test_RBM = np.array(row['psi'].iloc[0])

    cluster_coeffs_test_exact = fwht_coeffs_in_cluster_col_order(np.log(psi_test_exact), hilb_test)
    cluster_coeffs_test_RBM = fwht_coeffs_in_cluster_col_order(np.log(psi_test_RBM), hilb_test)

    prepare_fwht_meta_cached(hilb_test)  # fill cache (fast)

    infidels_exact_opt = [infidelity(compress_and_reconstruct_cached(cluster_coeffs_test_exact, compr_idx, hilb_test), psi_test_exact) 
            for compr_idx in compr_idx_list]
    infidels_RBM_opt = [infidelity(compress_and_reconstruct_cached(cluster_coeffs_test_RBM, compr_idx, hilb_test), psi_test_exact) 
            for compr_idx in compr_idx_list]
    
    # convert arrays to lists for safe storage in HDF5
    dict_row = {
            'idx': int(idx), 
            'infidels_exact_opt' : infidels_exact_opt,
            'infidels_RBM_opt' : infidels_RBM_opt, 
            'cluster_coeffs_exact': np.array(cluster_coeffs_test_exact).tolist(), 
            'cluster_coeffs_RBM': np.array(cluster_coeffs_test_RBM).tolist()
        }

    # append the row to df_out
    df_out = pd.concat([df_out, pd.DataFrame([dict_row])], ignore_index=True)

# After loop, save dataframe to HDF5
out_path = 'cluster_coeffs.h5'
# Use format='table' for appendable, but here we write once
try:
    df_out.to_hdf(out_path, key='df', mode='w')
    print(f"Saved df_out to {out_path} (rows={len(df_out)})")
except Exception as e:
    print("Failed to save to HDF5 (falling back to pickle):", e)
    df_out.to_pickle('cluster_coeffs.pkl')
    print("Saved df_out to cluster_coeffs.pkl")


iteration  0
iteration  1
iteration  1
iteration  2
iteration  2
iteration  3
iteration  3
iteration  4
iteration  4
iteration  5
iteration  5
iteration  6
iteration  6
iteration  7
iteration  7
iteration  8
iteration  8
iteration  9
iteration  9
iteration  10
iteration  10
iteration  11
iteration  11
iteration  12
iteration  12
iteration  13
iteration  13
iteration  14
iteration  14
iteration  15
iteration  15
iteration  16
iteration  16
iteration  17
iteration  17
iteration  18
iteration  18
iteration  19
iteration  19
iteration  20
iteration  20
iteration  21
iteration  21
iteration  22
iteration  22
iteration  23
iteration  23
iteration  24
iteration  24
iteration  25
iteration  25
iteration  26
iteration  26
iteration  27
iteration  27
iteration  28
iteration  28
iteration  29
iteration  29
iteration  30
iteration  30
iteration  31
iteration  31
iteration  32
iteration  32
iteration  33
iteration  33
iteration  34
iteration  34
iteration  35
iteration  35
iteration  36
iteration  

KeyboardInterrupt: 

NameError: name 'df_raw' is not defined