In [23]:
import os
import pandas as pd 
import numpy as np 
import utils.statistics as us

In [2]:
def process_table(boot_file: str, mcmc_file: str, save: bool = True) -> pd.DataFrame:
    
    # load the file which contains the index of the n(z) sample
    boot_file = np.loadtxt(boot_file + '.txt')
    
    # load the MCMC file 
    mcmc = np.loadtxt(mcmc_file + '.txt')
    
    # create dataframes with specific column names
    boot_df = pd.DataFrame(boot_file, columns=['Index', '-logL'])
    mcmc_df = pd.DataFrame(mcmc, columns=['N', '-logL'] + ['p'+str(i+1) for i in range(11)])
    mcmc_df['N'] = mcmc_df['N'].astype('int')
    
    # find the rows common to both files (by comparing the log-likelihood)
    index_df = pd.DataFrame(boot_df[boot_df['-logL'].isin(mcmc_df['-logL'])]['Index'], dtype=int)
    index_df.reset_index(drop=True, inplace = True)
    
    # combine both files
    final_df = pd.concat([index_df, mcmc_df], axis = 1)
    final_df = final_df.dropna(axis=0)
    
    if save:
        final_df.to_csv(mcmc_file + '_boot.csv')
    
    return final_df

In [5]:
PATH = '/home/harry/Desktop/MontePython-V3.4/chains/'
folder = PATH + 'KV-450-Bayes-Random-Set-6/'

# full path where the likelihood values are stored
like = PATH + 'mp_output_6'

files = np.asarray(os.listdir(folder))

In [4]:
mcmc_files = files[['__' in files[i] for i in range(len(files))]]
mcmc_files = [x.split('.')[0] for x in mcmc_files]

In [6]:
all_df = []

for i in range(8):
    df = process_table(like, folder + mcmc_files[i], save = False)
    all_df.append(df)

In [7]:
samples = pd.concat(all_df, axis = 0)
samples.reset_index(inplace=True)

In [8]:
set_2 = samples[(samples['Index'] >= 0) & (samples['Index'] <= 4999)]
set_3 = samples[(samples['Index'] >= 5000) & (samples['Index'] <= 9999)]
set_4 = samples[(samples['Index'] >= 10000) & (samples['Index'] <= 14999)]
set_5 = samples[(samples['Index'] >= 15000) & (samples['Index'] <= 19999)]

In [19]:
def extract_parameters(file: pd.DataFrame):
    
    samples = file[['p'+str(i+1) for i in range(11)]].values
    nsamples = samples.shape[0]
    
    # compute S_8 
    S8 = samples[:,-1] * np.sqrt(samples[:,-2] / 0.3)
    S8 = S8.reshape(nsamples, 1) 
    Om = samples[:,-2].reshape(nsamples, 1)
    
    S8_Om = np.concatenate([Om, S8], axis = 1)
    
    return samples, S8_Om

In [22]:
labels = [r'$\Omega_{\textrm{m}}$', r'$S_{8}$']

In [27]:
samples_combined, two_params_combined = extract_parameters(samples)
samples_2, two_params_2 = extract_parameters(set_2)
samples_3, two_params_3 = extract_parameters(set_3)
samples_4, two_params_4 = extract_parameters(set_4)
samples_5, two_params_5 = extract_parameters(set_5)

In [28]:
us.summary(two_params_combined, labels, 1)

Using the median
--------------------------------------------------------------------------------
$\Omega_{\textrm{m}}$                    :  0.305 ± (0.096, 0.100)
$S_{8}$                                  :  0.757 ± (0.039, 0.040)

Using the mean
--------------------------------------------------------------------------------
$\Omega_{\textrm{m}}$                    :  0.305 ± (0.096, 0.101)
$S_{8}$                                  :  0.756 ± (0.039, 0.039)


In [29]:
us.summary(two_params_2, labels, 1)

Using the median
--------------------------------------------------------------------------------
$\Omega_{\textrm{m}}$                    :  0.305 ± (0.096, 0.100)
$S_{8}$                                  :  0.756 ± (0.039, 0.040)

Using the mean
--------------------------------------------------------------------------------
$\Omega_{\textrm{m}}$                    :  0.306 ± (0.096, 0.100)
$S_{8}$                                  :  0.756 ± (0.039, 0.039)


In [30]:
us.summary(two_params_3, labels, 1)

Using the median
--------------------------------------------------------------------------------
$\Omega_{\textrm{m}}$                    :  0.303 ± (0.098, 0.100)
$S_{8}$                                  :  0.757 ± (0.040, 0.040)

Using the mean
--------------------------------------------------------------------------------
$\Omega_{\textrm{m}}$                    :  0.305 ± (0.096, 0.101)
$S_{8}$                                  :  0.756 ± (0.040, 0.039)


In [31]:
us.summary(two_params_4, labels, 1)

Using the median
--------------------------------------------------------------------------------
$\Omega_{\textrm{m}}$                    :  0.305 ± (0.096, 0.099)
$S_{8}$                                  :  0.757 ± (0.039, 0.040)

Using the mean
--------------------------------------------------------------------------------
$\Omega_{\textrm{m}}$                    :  0.305 ± (0.095, 0.100)
$S_{8}$                                  :  0.756 ± (0.040, 0.040)


In [32]:
us.summary(two_params_5, labels, 1)

Using the median
--------------------------------------------------------------------------------
$\Omega_{\textrm{m}}$                    :  0.305 ± (0.097, 0.102)
$S_{8}$                                  :  0.757 ± (0.039, 0.040)

Using the mean
--------------------------------------------------------------------------------
$\Omega_{\textrm{m}}$                    :  0.305 ± (0.096, 0.102)
$S_{8}$                                  :  0.756 ± (0.039, 0.040)


In [33]:
# os.makedirs('subsets', exist_ok=True)
# samples.to_csv('subsets/all_samples.csv')
# set_2.to_csv('subsets/set_2.csv')
# set_3.to_csv('subsets/set_3.csv')
# set_4.to_csv('subsets/set_4.csv')
# set_5.to_csv('subsets/set_5.csv')