In [None]:
import fnmatch
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os
import pandas as pd
import seaborn as sns

%matplotlib inline

In [None]:
# font styles
font_family = "Times"
fs_title = 20
fs_label = 18
fs_small = 14

# init mpl plot style
sns.set_style('ticks')
mpl.rcParams["font.family"] = font_family
mpl.rcParams["mathtext.fontset"] = "stix"
fig_height = 6

In [None]:
save_figures = True

transformer_train_ids = {
    "4x5": "iter100_lr0.001_wd0.0_bs100_dropout0.1_samplestruct2_lrschedulewarmup_cosine13102022-181556",
    "5x5": "iter100_lr0.001_wd0.0_bs100_dropout0.1_samplestruct2_lrschedulewarmup_cosine13102022-181610",
    "6x5": "iter100_lr0.001_wd0.0_bs100_dropout0.1_samplestruct2_lrschedulewarmup_cosine13102022-181649",
    "7x5": "iter100_lr0.001_wd0.0_bs100_dropout0.1_samplestruct2_lrschedulewarmup_cosine13102022-181750",
    "8x5": "iter100_lr0.001_wd0.0_bs100_dropout0.1_samplestruct2_lrschedulewarmup_cosine13102022-182303",
    "9x5": "iter100_lr0.001_wd0.0_bs100_dropout0.1_samplestruct2_lrschedulewarmup_cosine13102022-182505",
}

kernel_names = {
    'dirichlet-kernel': 'Dirichlet Kernel',
    'rbf-kernel': 'RBF Kernel',
}

transformer_model_id = 'gcn_proj_3_16-transformer_l4_d128_h4_featone_hot'

In [None]:
res_dir_pattern = '../logs/2d_heisenberg_checkpoints/conditional_heisenberg_{rows}x{cols}/{model_id}/ns{ns}/{train_id}/'
kernel_dir_pattern = '../logs/2d_heisenberg_checkpoints/conditional_heisenberg_{rows}x{cols}/{model_id}/ns{ns}/results/{split}/'

In [None]:
def get_correlation_error_distribution(props_dir, data_dir, rows, cols, corr_mat_name, model_name):
    qubits = rows * cols
    
    # get hamiltonian ids
    # ids = [f[16:-4] for f in os.listdir(data_dir) if fnmatch.fnmatch(f, 'correlation_matrix_id*.npy')]
    ids = [int(fp[(fp.find('id') + 2):fp.find('.npy')]) for fp in os.listdir(data_dir) if fnmatch.fnmatch(fp, 'correlation_matrix_id*.npy')]
    
    # compute rmses
    errors = np.zeros(shape=(qubits, qubits))
    n = 0
    for i in ids:
        cmat_pred = np.load(os.path.join(props_dir, corr_mat_name.format(idx=i)))
        cmat_true = np.load(os.path.join(data_dir, f'correlation_matrix_id{i}.npy'))
        
        errors = errors + (cmat_pred - cmat_true) ** 2
        n += 1

    rmses = np.sqrt(errors / n)
    np.fill_diagonal(rmses, np.nan)
    rmses = np.reshape(rmses, newshape=(-1))
    
    df = pd.DataFrame.from_dict(data={
        'rmse': rmses,
        'system_size': [f'{rows}x{cols}'] * len(rmses),
        'Model': [model_name] * len(rmses)
    })
    
    return df

In [None]:
def get_correlation_rmse_df(kernel_id, split, systems_sizes):
    df = None
    
    for (rows, cols, nsamples) in systems_sizes:        
        train_id = transformer_train_ids[f"{rows}x{cols}"]
        res_dir = res_dir_pattern.format(rows=rows, cols=cols, model_id=transformer_model_id, ns=nsamples, train_id=train_id)
        data_dir = os.path.join(res_dir, 'data', f'{rows}x{cols}', split)
        
        # load transformer errors
        props_dir = os.path.join(res_dir, 'properties', split, 'model', 'correlations')
        dfm = get_correlation_error_distribution(
            props_dir=props_dir, data_dir=data_dir, rows=rows, cols=cols,  corr_mat_name='correlations_model_id{idx}.npy',
            model_name='Transformer (Ours)'
        )

        # load shadow errors
        props_dir = os.path.join(res_dir, 'properties', split, 'shadow', 'correlations')
        dfs = get_correlation_error_distribution(
            props_dir=props_dir, data_dir=data_dir, rows=rows, cols=cols, corr_mat_name='correlations_shadow_id{idx}.npy',
            model_name='Shadow'
        )
        
        # load kernel errors
        res_dir = kernel_dir_pattern.format(rows=rows, cols=cols, model_id=kernel_id, ns=nsamples, split=split)
        dfk = get_correlation_error_distribution(
            props_dir=res_dir, data_dir=data_dir, rows=rows, cols=cols, corr_mat_name='estim_corr_mat_id{idx}.npy',
            model_name=kernel_names[kernel_id]
        )
        
        dfsz = pd.concat([dfm, dfk, dfs], ignore_index=True,axis=0)

        if df is None:
            df = dfsz
            continue

        df = pd.concat([df, dfsz], ignore_index=True,axis=0)
    
    return df

In [None]:
def make_stripplot(df: pd.DataFrame, title, figsize=(4, 6), save_as=None, y_title=0.98):
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    xmax = df.rmse.max()
    
    # make test strips
    sns.stripplot(x="rmse", y="system_size", hue="Model", data=df, alpha=0.5, size=2, dodge=True, ax=ax)
    ax.set_ylabel(None)
    ax.set_xlabel('RMSE', fontsize=fs_label)
    ax.set_xlim(-0.01, xmax+0.05)
    ax.tick_params(labelsize=fs_small)
    
    for _,s in ax.spines.items():
        s.set_linewidth(1)
        s.set_color('black')
    
    ax.legend(fontsize=fs_small, handletextpad=0.1, labelspacing=.1, framealpha=0.0,
              handlelength=0.9, fancybox=False, columnspacing=1, frameon=False, ncol=3, loc='upper center', bbox_to_anchor=(0.5, 1.1))
    
    if title is not None:
        fig.suptitle(title, fontsize=fs_title, y=y_title)
    
    if save_as is not None:
        folder, fn = os.path.split(save_as)
        
        if not os.path.exists(folder):
            os.makedirs(folder)
            
        plt.savefig(save_as, bbox_inches='tight', pad_inches=0.01, dpi=200)
        plt.close(fig)

In [None]:
systems_sizes = [
    (4, 5, 1000),
    (5, 5, 1000),
    (6, 5, 1000),
    (7, 5, 1000),
    (8, 5, 1000),
    (9, 5, 1000),
]

In [None]:
df = get_correlation_rmse_df('rbf-kernel', 'test', systems_sizes=systems_sizes)

save_as = None
if save_figures:
    save_as = './figures/2d_heisenberg/correlations_kenrel_comparison.pdf'

title = 'Two-Point Correlation Functions RMSE'
title = None
make_stripplot(df, title=title, save_as=save_as, y_title=0.99)