In [1]:
import sys
import os
import numpy as np
import matplotlib.pylab as plt 

plt.rc('text', usetex=True)
plt.rc('font',**{'family':'sans-serif','serif':['Palatino']})
figSize  = (12, 8)
fontSize = 20

In [2]:
folder = '/home/harry/Documents/Oxford/Projects/KV-450-Local/george-samples/'
work_folder = '/home/harry/Documents/Oxford/Projects/KV-450/'

In [3]:
# interval in redshifts
z_interval = ['0.1t0.3', '0.3t0.5', '0.5t0.7', '0.7t0.9', '0.9t1.2']

In [4]:
def load_set(number: int):
    """This function is used to create a subset of the data.

    Args:
        number (int): The number of objects to be selected.
    """

    # file name
    fname = folder + 'Nz_DIR' + str(number) + '/Nz_DIR'+str(number) + '_Bootstrap'
    
    records = list()
    
    for i in range(5):

        file = fname + '/Nz_z' + z_interval[i]
        file += '_FULL_DIR' + str(number) + '.asc'

        # load the file
        samples = np.loadtxt(file)
        
        records.append(samples)
        
    return records

In [27]:
def trace_heights(samples: list, ntomo:int, height: int, nlast: int = 5000, save: bool = False, number: int = 2):
    """
    samples is a list which contains n(z) samples for tomographic bins [0, 1, 2, 3, 4]
    """
    selected_height = samples[ntomo][:,1:][height]
    
    num_height = len(selected_height)
    
    plt.figure(figsize = figSize)
    plt.plot(selected_height)
    plt.ylabel(f'$h_{height}^{ntomo}$', fontsize = fontSize)
    plt.xlabel(r'$i$', fontsize = fontSize)
    plt.tick_params(axis='x', labelsize=fontSize)
    plt.tick_params(axis='y', labelsize=fontSize)
    plt.xlim(0, num_height)
    
    if save:
        folder = work_folder + f'plots/set_{nlast}_{number}/heights/'
        os.makedirs(folder, exist_ok=True)
        
        fname = folder + f'{ntomo}_{height}'
        plt.savefig(fname +'.png', bbox_inches = 'tight')
        plt.savefig(fname +'.pdf', bbox_inches = 'tight')
    plt.close() 

In [28]:
def thinning(samples: list, factor: int, nlast: int = 5000, save: bool = False, **kwargs):
    
    # number of redshift 
    nred = samples[0].shape[0]
    
    # the first column is redshift - so we subtract 1
    nheights = samples[0].shape[1] - 1 

    # the values of redshift
    redshift = samples[0][:,0].reshape(nred, 1)
    
    files = {}
    
    for tomo in range(5):
    
        # the samples, thinned by the factor (and we take the second half chain)
        samples_thinned = samples[tomo][:,1:][:,nheights//2:][:,::factor]
        
        if tomo == 0:
            print(f'Number of samples before slicing is {samples_thinned.shape[1]}')
        
        # take the last 5000 samples 
        samples_thinned = samples_thinned[:,-nlast:]
        
        # compute the mean 
        samples_mean = np.mean(samples_thinned, axis = 1).reshape(-1,1)
        
        # KV-450 takes as input, a file with the first column being redshift and others as heights
        # therefore, each column (except the first) is an n(z) sample
        samples_tomo = np.concatenate([redshift, samples_thinned], axis = 1)
        samples_tomo_mean = np.concatenate([redshift, samples_mean], axis = 1)
        
        files[z_interval[tomo]] = samples_tomo
        
        if save:
        
            # folder name
            folder = work_folder + f'Nz_Bayes/Nz_Bayes_{nlast}/Nz_Bayes_Bootstrap_' +str(kwargs['number'])
            folder_m = work_folder + f'Nz_Bayes/Nz_Bayes_{nlast}/Nz_Bayes_Mean_' +str(kwargs['number'])

            # create the folder
            os.makedirs(folder, exist_ok=True)
            os.makedirs(folder_m, exist_ok=True)

            # file name
            fname = 'Nz_Bayes_z'+z_interval[tomo]

            # use compressed format to store data
            np.savetxt(folder + '/' + fname + '.asc', samples_tomo, fmt='%1.5f')
            np.savetxt(folder_m + '/' + fname + '.asc', samples_tomo_mean, fmt='%1.5f')
        
    return files

# Notes

- There are 126 000 MCMC samples.
- Thinning factor = 12, and this results in 5250 $n(z)$ samples. 


In [29]:
# nz = load_set(2)

In [30]:
# files = thinning(nz, 12, save = False, number = 2)

Plot some traces

In [31]:
# for tomo in range(5):
#     for h in range(10):
#         trace_heights(nz, tomo, h, save = False, number = 2)

# Main

In [32]:
def main(num, nlast: int= 5000):
    
    assert num in [2, 3, 4, 5], 'The number should be 2, 3, 4 or 5.'
    
    # load the n(z)
    nz = load_set(num)
    
    # the number of samples (heights)
    nheights = nz[0].shape[1]-1   
    thinning_factor = int(nheights/2 // nlast)
    files = thinning(nz, thinning_factor, nlast, save = True, number = num)
    
    print(f'The number of samples is {nheights}')
    print(f'The thinning factor is {thinning_factor}')
    print(f'Shape of file is {files[z_interval[0]].shape[1] - 1}')
    print('*'*50)

    for tomo in range(5):
        for h in range(10):
            trace_heights(nz, tomo, h, nlast, save = True, number = num)
                
    return files    

In [35]:
files_2 = main(2, nlast = 5000)
files_3 = main(3, nlast = 5000)
files_4 = main(4, nlast = 5000)
files_5 = main(5, nlast = 5000)

Number of samples before slicing is 5250
The number of samples is 126000
The thinning factor is 12
Shape of file is 5000
**************************************************
Number of samples before slicing is 5223
The number of samples is 94000
The thinning factor is 9
Shape of file is 5000
**************************************************
Number of samples before slicing is 5130
The number of samples is 184667
The thinning factor is 18
Shape of file is 5000
**************************************************
Number of samples before slicing is 5142
The number of samples is 267334
The thinning factor is 26
Shape of file is 5000
**************************************************


### Set 2 
- The number of samples is 126000
- The thinning factor is 12
- Shape of final file is 5250

### Set 3 
- The number of samples is 94000
- The thinning factor is 9
- Shape of final file is 5223

### Set 4 
- The number of samples is 184667
- The thinning factor is 18
- Shape of final file is 5130

### Set 5
- The number of samples is 267334
- The thinning factor is 26
- Shape of final file is 5142