In [1]:
import numpy as np 
import pandas as pd
from numpyro.diagnostics import summary
from utils.helpers import pickle_load
import matplotlib.pylab as plt 

plt.rc('text', usetex=True)
plt.rc('font',**{'family':'sans-serif','serif':['Palatino']})
figSize  = (12, 8)
fontSize = 15

In [2]:
ANALYSIS = 'lsst'

if ANALYSIS != 'lsst':
    KEYS = ['sigma8', 'Omegac', 'Omegab', 'hubble', 'ns',
            'm1', 'm2', 'm3', 'm4',
            'dz_wl_1', 'dz_wl_2', 'dz_wl_3', 'dz_wl_4',
            'a_ia', 'eta',
            'b1', 'b2', 'b3', 'b4', 'b5', 
            'dz_gc_1', 'dz_gc_2', 'dz_gc_3', 'dz_gc_4', 'dz_gc_5']
else:
   
    KEYS = ['sigma8', 'Omegac', 'Omegab', 'hubble', 'ns', "m1", "m2", "m3", "m4", "m5",
    "dz_wl_1", "dz_wl_2", "dz_wl_3", "dz_wl_4", "dz_wl_5",
    "a_ia", "eta", "b1", "b2", "b3", "b4", "b5", "b6", "b7", "b8", "b9", "b10",
    "dz_gc_1", "dz_gc_2", "dz_gc_3", "dz_gc_4", "dz_gc_5",
    "dz_gc_6", "dz_gc_7", "dz_gc_8", "dz_gc_9", "dz_gc_10"]

# weight    
# minuslogpost          
# sigma8          
# omegac          
# omegab          
# hubble              
# ns              
# m1     
# m2              
# m3              
# m4              
# m5         
# dz_wl_1         
# dz_wl_2         
# dz_wl_3         
# dz_wl_4         
# dz_wl_5            
# a_ia             
# eta              
# b1              
# b2              
# b3              
# b4              
# b5             
# b6              
# b7              
# b8              
# b9             
# b10         
# dz_gc_1         
# dz_gc_2         
# dz_gc_3         
# dz_gc_4 
# dz_gc_5         
# dz_gc_6         
# dz_gc_7         
# dz_gc_8         
# dz_gc_9        
# dz_gc_10   
# minuslogprior 
# minuslogprior__0    
# chi2  
# chi2__LSSTlike


# ---------------------
# weight    
# minuslogpost          
# sigma8          
# omegac          
# omegab          
# hubble              
# ns              
# m1              
# m2              
# m3              
# m4         
# dz_wl_1         
# dz_wl_2         
# dz_wl_3         
# dz_wl_4            
# a_ia             
# eta              
# b1              
# b2              
# b3              
# b4              
# b5         
# dz_gc_1         
# dz_gc_2         
# dz_gc_3         
# dz_gc_4         
# dz_gc_5   
# minuslogprior 
# minuslogprior__0            
# chi2 
# chi2__my_likelihood

In [3]:
def summary_calculation(samples1: np.ndarray, samples2: np.ndarray, neval: int) -> pd.DataFrame:
    record = []
    for i, key in enumerate(KEYS):
        testsamples = np.vstack(([samples1[:,i], samples2[:,i]]))
        summary_stats = summary(testsamples)
        summary_stats[key] = summary_stats.pop('Param:0')
        record.append(summary_stats)

    record_df = []
    for i in range(len(record)):
        record_df.append(pd.DataFrame(record[i]).round(3).loc[['r_hat', 'n_eff', 'mean', 'std']])

    record_df = pd.concat(record_df, axis = 1).T
    record_df['scaled_n_eff'] = record_df['n_eff'] / neval
    return record_df

## Cobaya

In [4]:
def cobaya_statistics(engine = 'jaxcosmo', cov = False):
    
    record_samples = []
    nsamples = []
    nlike = 0
    for i in range(2):
        if ANALYSIS != 'lsst':
            if cov:
                fname = f'outputcobaya/withcov/{engine}_{i+1}/output_prefix.1.txt'
            else:
                fname = f'outputcobaya/testing/{engine}_{i+1}/output_prefix.1.txt'
                
        else:
            if cov:
                fname = f'CobayaLSST/{engine}_cov_{i+1}/lsst.1.txt'
            else:
                fname = f'CobayaLSST/{engine}_{i+1}/lsst.1.txt'

        file = np.loadtxt(fname)
        weight = file[:,0]
        samples = file[:,2:-4]
        nlike += sum(weight)
        record_samples.append(samples)
        nsamples.append(samples.shape[0])

    min_nsamples = min(nsamples)

    stats = summary_calculation(record_samples[0][-min_nsamples:], record_samples[1][-min_nsamples:], nlike)

    return stats

In [34]:
## LSST
# 0.0002872206804902297 (Cobaya EH)
# 0.027523081065323297 (NUTS EH)

# 0.00012025138105330559 (Cobaya EMU)
# 0.027563007453589822 (NUTS EMU)

## LSST (with cov for Cobaya)

# 0.02756300693233674 (NUTS EMU)
# 0.003097665090667806 (Cobaya EMU)

# 0.027523081065323297 (NUTS EH)
# 0.003518281352371748 (Cobaya EH)

## DES 
# 0.04482185394258569 (NUTS EH)
# 0.004543626076053639 (Cobaya EH)

# 0.03415634511934331 (NUTS Emulator)
# 0.004761498346259732 (Cobaya Emulator)

In [35]:
# LSST - EH ()
0.027523081065323297 / 0.0002872206804902297

95.825554825463

In [36]:
# LSST - Emulator 
0.027563007453589822 / 0.00012025138105330559

229.21156673761246

In [37]:
# DES - EH 
0.04482185394258569 / 0.004543626076053639

9.864776104444681

In [38]:
# DES - Emulator
0.03415634511934331 / 0.004761498346259732

7.173444709095387

With Covariance

In [39]:
# LSST - EH 
0.027523081065323297 / 0.003518281352371748

7.822876657311504

In [40]:
# LSST - Emulator 
0.027563007453589822 / 0.003097665090667806

8.89799466592681

## EMCEE

In [10]:
def emcee_stats(engine = 'jaxcosmo'):

    test_1 = pickle_load('samples', f'{engine}_emcee_1')
    test_2 = pickle_load('samples', f'{engine}_emcee_2')

    nevals = test_1.flatchain.shape[0] + test_2.flatchain.shape[0]

    samples_1 = test_1.flatchain #test_1.get_chain(discard = discard, thin = thin, flat = True) 
    samples_2 = test_2.flatchain #test_2.get_chain(discard = discard, thin = thin, flat = True)
    
    
    stats = summary_calculation(samples_1, samples_2, nevals)
    return stats

## NUTS

In [11]:
def nuts_stats(engine = 'jaxcosmo'):
    
    if ANALYSIS == 'lsst':
        sampler = pickle_load('lsst', f'nuts_sampler_{engine}')
    else:
        sampler = pickle_load('samples', f'{engine}_nuts_small_ss_high_td')

    nsamples = sampler.num_chains * sampler.num_samples
    num_steps = sampler.get_extra_fields(group_by_chain=True)['num_steps'].sum(1).sum(0).item()
    samples = sampler.get_samples(group_by_chain=True)
    record = []
    for key in KEYS:
        parameter_samples = samples[key]
        summary_stats = summary(parameter_samples)
        summary_stats[key] = summary_stats.pop('Param:0')
        record.append(summary_stats)

    record_df = []
    for i in range(len(record)):
        record_df.append(pd.DataFrame(record[i]).round(3).loc[['r_hat', 'n_eff', 'mean', 'std']])

    record_df = pd.concat(record_df, axis = 1).T
    record_df['scaled_n_eff'] = record_df['n_eff'] / num_steps
    return record_df

In [12]:
def nuts_stats_jaxcosmo():
    info = pickle_load('lsst', 'nuts_jaxcosmo_info')
    samples, steps = info['samples'], info['steps']
    num_steps = sum(steps[0]) + sum(steps[1])
    
    samples_1 = []
    samples_2 = []
    for key in KEYS:
        samples_1.append(samples[key][0])
        samples_2.append(samples[key][1])
    samples_1 = np.asarray(samples_1)
    samples_2 = np.asarray(samples_2)
    df = summary_calculation(samples_1.T, samples_2.T, num_steps)
    return df 

In [13]:
def numpyro_model():
    pass

In [14]:
if ANALYSIS == 'lsst':
    df_cobaya_emu = cobaya_statistics('emulator', cov = True)
    df_nuts_emu = nuts_stats('emulator')

    df_cobaya_jc = cobaya_statistics('jaxcosmo', cov = True)
    
    # because we used JAX 0.4.25 on Glamdring - this is separate
    df_nuts_jc = nuts_stats_jaxcosmo()
    
else:

    df_cobaya_jc = cobaya_statistics(engine = 'jaxcosmo')
    df_emcee_jc = emcee_stats(engine = 'jaxcosmo')
    df_nuts_jc = nuts_stats(engine = 'jaxcosmo')

    df_cobaya_emu = cobaya_statistics(engine = 'emulator')
    df_emcee_emu = emcee_stats(engine = 'emulator')
    df_nuts_emu = nuts_stats(engine = 'emulator')

In [31]:
max(np.abs((df_cobaya_jc['mean'] - df_nuts_jc['mean'])) / np.sqrt(df_cobaya_jc['std']**2 + df_nuts_jc['std']**2))

0.10101525445520994

In [32]:
max(np.abs((df_cobaya_emu['mean'] - df_nuts_emu['mean'])) / np.sqrt(df_cobaya_emu['std']**2 + df_nuts_emu['std']**2))

0.1767766952966369

In [17]:
df_nuts_emu['scaled_n_eff'].mean()

0.02756300693233674

In [18]:
df_cobaya_emu['scaled_n_eff'].mean()

0.003097665090667806

In [19]:
df_nuts_jc['scaled_n_eff'].mean()

0.027523081065323297

In [20]:
df_cobaya_jc['scaled_n_eff'].mean()

0.003518281352371748

In [21]:
df_nuts_emu['scaled_n_eff'].mean() / df_cobaya_emu['scaled_n_eff'].mean()

8.89799449765391

In [22]:
df_nuts_jc['scaled_n_eff'].mean() / df_cobaya_jc['scaled_n_eff'].mean()

7.822876657311504

In [23]:
df_nuts_emu.head()

Unnamed: 0,r_hat,n_eff,mean,std,scaled_n_eff
sigma8,1.0,20873.033,0.816,0.007,0.021188
Omegac,1.0,12373.363,0.262,0.004,0.01256
Omegab,1.0,9912.097,0.052,0.002,0.010061
hubble,1.001,7967.451,0.654,0.012,0.008088
ns,1.001,13306.551,1.043,0.009,0.013507


In [24]:
df_cobaya_emu.head()

Unnamed: 0,r_hat,n_eff,mean,std,scaled_n_eff
sigma8,1.0,11367.57,0.816,0.007,0.00304
Omegac,1.0,10039.203,0.262,0.004,0.002684
Omegab,1.0,9379.736,0.052,0.002,0.002508
hubble,1.0,8560.367,0.654,0.012,0.002289
ns,1.0,10845.647,1.042,0.009,0.0029


In [25]:
df_nuts_jc.head()

Unnamed: 0,r_hat,n_eff,mean,std,scaled_n_eff
sigma8,1.0,17650.966,0.811,0.008,0.019791
Omegac,1.0,13293.174,0.274,0.006,0.014905
Omegab,1.0,9933.145,0.049,0.003,0.011137
hubble,1.0,8654.722,0.658,0.015,0.009704
ns,1.0,19155.291,1.027,0.007,0.021477


In [26]:
df_cobaya_jc.head()

Unnamed: 0,r_hat,n_eff,mean,std,scaled_n_eff
sigma8,1.0,13223.286,0.812,0.008,0.00381
Omegac,1.0,11976.858,0.274,0.006,0.00345
Omegab,1.0,12995.122,0.049,0.003,0.003744
hubble,1.0,10821.691,0.658,0.015,0.003118
ns,1.0,13975.483,1.026,0.007,0.004026
