In [None]:
import matplotlib.pyplot as plt
import sys
import classy
import os
module_path = classy.__file__
print(f"Path to module: {module_path}")
from classy import Class
import chaospy as cp
import pyDOE
import numpy as np
import time
from scipy.interpolate import interp1d

from spectra import pZA, p1L, p1L_cleft, p1L_kecleft
from covs import Pmm_cov, F, Pmm_cov_ZCV, addGaussianNoise


Path to module: /Users/alexabartlett/.local/lib/python3.9/site-packages/classy-3.2.3-py3.9-macosx-10.9-x86_64.egg/classy.cpython-39-darwin.so


In [6]:
class QuasiRandomSequence():
    """A simple class to generate quasi-random, or low-discrepancy, sequences
       in multiple dimensions.  Based upon
       http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/
       This is currently written in pure Python, but if speed ever becomes
       an issue it could likely be heavily optimized using Numba.
    """
    def __init__(self,n_dim=1,seed=0.5):
        """Initializes the class and sets the vector alpha.  The sequences
           will consist of n_dim-dimensional vectors in [0,1)^n_dim."""
        self.seed= seed
        # Get the generalized golden ratio for n_dim dimensions, aka
        # the "harmonius numbers" of Hans van de Laan.  This solves
        # x^{d+1}=x+1 using Newton-Raphson:
        phi = 1.0
        for i in range(20):
            phi = phi-(pow(phi,n_dim+1)-phi-1)/((n_dim+1)*pow(phi,n_dim)-1)
        self.phiinv = 1.0/phi
        # and hence generate our base vector, alpha.
        self.alpha=np.array([(self.phiinv**(i+1))%1 for i in range(n_dim)])
        #
    def __call__(self,n):
        """Returns the first n vectors in the (Korobov) sequence, with x[i,:]
           being the i'th vector."""
        tmp = self.seed + 0*self.alpha
        ret = np.zeros( (n,self.alpha.size) )
        for i in range(n):
            tmp     += self.alpha
            ret[i,:] = tmp
        ret = ret%1
        return(ret)
        
    def get_vector(self,n):
        """Returns just the nth vector in the sequence, starting at n=1."""
        ret = (self.seed + n*self.alpha)%1
        return(ret)
        

In [7]:
# Code to generate points in parameter space
def get_seq(pmin, pmax, scheme='qrs', ntot=None):
    """Takes the range of parameters, only varying those where pmin!=pmax.
        The dimensions of pmin and pmax should equal the total number
        of parameters for the model."""
    ndim = np.sum(np.abs(pmax-pmin) > 0)
    # ntot = ndim
    if scheme == 'qrs':
        seq = QuasiRandomSequence()
    elif scheme == 'sobol':
        import chaospy as cp
        seq = cp.create_sobol_samples(ntot, ndim).T
    elif scheme == 'lhs':
        seq = pyDOE.lhs(ndim, samples=ntot)
    elif scheme == 'lhs_maximin':
        seq = pyDOE.lhs(ndim, samples=ntot, criterion='maximin')
    return seq

def sample(pmin, pmax, scheme, n, nsamps):
    """Gets the n'th vector of parameter values."""
    pars = pmin.copy()
    vary = np.abs(pmax-pmin) > 0
    seq = get_seq(pmin, pmax, scheme, ntot=nsamps)

    if scheme == 'qrs':
        v = seq.get_vector(n)
    else:
        v = seq[n]

    if scheme != 'aemulus':
        pars[vary] = pmin[vary] + \
                        (pmax-pmin)[vary]*v
    else:
        pars = seq[n]
    return pars


In [8]:
def get_training_data(size, pmin, pmax, zs, scheme, Vobs, deltak, savedir, add_noise, use_sigma_8, ZCV):
    """ Writes training data to given save directory.

    Args:
        size: number of samples to generate
        pmin: array of minimum values of cosmological parameters to be sampled
        pmax: array of maximum values of comsological parameters to be sampled
        scheme: method used to sample parameter space
        Vobs: 'observed'/'simulated' volume in [Mpc/h]^3
        deltak: k-bin size in [h/Mpc]
        savedir: directory to save training data to
        add_noise: flag indicating whether to add noise to training data or not 
        use_sigma_8: flag indicating whether to save sigma_8(z)
        ZCV: flag indicating whether or not to use ZCV to reduce large-scale variance

    Returns:
        array: training data
    """
    start = time.time()
    fixed_params = {
        'z_max_pk': 10,
        'N_ur': 0.00441,
        'N_ncdm': 3,
        'output': 'mPk',
        'P_k_max_h/Mpc': 20.,
        'deg_ncdm': '1,1,1',
        'T_cmb': 2.7255,
        'Omega_Lambda': 0.0,
        'cs2_fld': 1.0,
        'fluid_equation_of_state': "CLP"}
    
    # P_linear
    temp = Class()
    temp.set(fixed_params)

    # hmcode
    hmcode = Class()
    hmcode.set(fixed_params)
    hmcode.set({'non_linear':'hmcode'})

    # linear power spectrum should be computed for a larger range of ks than desired for 1-loop
    kk_lin = np.logspace(-3.0,1,700)
    kk_out = np.logspace(-2, np.log10(4),700)
    np.savetxt(savedir+'/k_out.txt', np.array(kk_out).T)

    pk_lins = []
    pks_hmcode = []
    pks_1L = []
    param_list = []
    if use_sigma_8==True:
        sigma_8 = []

    z_string = ", ".join(str(z) for z in zs)
    # looping over cosmologies
    for i in range(size):
        samp = sample(pmin, pmax, scheme, i, size)
        temp_params = {
            'A_s': samp[0],
            'n_s': samp[1],
            'h': samp[2]/100,
            'w0_fld': samp[3],
            'wa_fld': samp[4],
            'omega_b': samp[5],
            'omega_cdm': samp[6],
            'm_ncdm': str(samp[7]/3)+','+str(samp[7]/3)+','+str(samp[7]/3),
            'z_pk': z_string,
        }
        temp.set(temp_params)
        temp.compute()

        hmcode.set(temp_params)
        hmcode.compute()

        Pk_lin_temp = [[0 for k in range(len(kk_lin))] for j in range(len(zs))]
        Pk_hmcode_temp = [[0 for k in range(len(kk_out))] for j in range(len(zs))]
        Pk_1L_temp = [0 for j in range(len(zs))]
        sigma_8_temp = [0 for j in range(len(zs))]
        
        # looping over redshifts
        for j in range(len(zs)):
            # looping over ks
            for k in range(len(kk_lin)):
                Pk_lin_temp[j][k] = temp.pk(kk_lin[k]*temp.h(), zs[j])*temp.h()**3
            for k in range(len(kk_out)):    
                Pk_hmcode_temp[j][k] = hmcode.pk(kk_out[k]*hmcode.h(), zs[j])*hmcode.h()**3

            if(add_noise):
                if(ZCV):
                    Cov_ZCV = Pmm_cov_ZCV(kk_out, np.array(Pk_hmcode_temp[j][:]), Vobs, deltak)
                    Pk_hmcode_temp[j][:] = addGaussianNoise(Pk_hmcode_temp[j][:], Cov_ZCV, Vobs, deltak)
                else:
                    Cov_nl = Pmm_cov(kk_out, Pk_hmcode_temp[j][:], Vobs, deltak)
                    Pk_hmcode_temp[j][:] = addGaussianNoise(Pk_hmcode_temp[j][:], Cov_nl, Vobs, deltak)

            Pk_1L_temp[j] = p1L(kk_lin, kk_out, Pk_lin_temp[j][:])[1]
            
            if(use_sigma_8==True):
                sigma_8_temp[j] = temp.sigma(8, zs[j])

        pks_1L.append(Pk_1L_temp) 
        pk_lins.append(Pk_lin_temp)
        pks_hmcode.append(Pk_hmcode_temp)
        sigma_8.append(sigma_8_temp)

        param_list.append(samp)

    if(add_noise):
        info_header = "Parameters used to generate this training data: Vobs [h^-3 Mpc^3], deltak [h/Mpc], ZCV (true yes, false no)"
        info = [Vobs, deltak, ZCV]
        np.savetxt(savedir+'/info.txt', np.array(info), header=info_header)
    
    np.save(savedir+'/pk_lin.npy', pk_lins)
    np.save(savedir+'/pk_1L.npy', pks_1L)
    np.save(savedir+'/pk_hmcode.npy', pks_hmcode)

    np.savetxt(savedir+'/sigma_8.txt', sigma_8)
    np.savetxt(savedir+'/params.txt', param_list, header="As, ns, H0, w0, wa, omega_b, omega_cdm, m_ncdm, z")
    
    end = time.time()
    print('Time elapsed: ', round((end-start)/60, 2), ' minutes')
    return pk_lins, pks_hmcode, pks_1L


In [9]:
# parameter space bounds
# parameters: [As, ns, H0, w0, wa, omega_b, omega_cdm, m_ncdm]
pmin_tier1 = np.array([1.10e-9, 0.93, 52., -1.56, -2., 0.0173, 0.08, 0.01]) # 8D parameter space
pmax_tier1 = np.array([3.10e-9, 1.01, 82., -0.44, 0., 0.0272, 0.16, 0.5])

pmin_tier2 = np.array([1.77e-9, 0.93, 59.5, -1.28, -1., 0.0198, 0.11, 0.01])
pmax_tier2 = np.array([2.43e-9, 1.01, 74.5, -0.72, 0., 0.0248, 0.13, 0.5])


In [10]:
scheme = 'lhs_maximin'
train_size = 100
test_size = 10
Vobs = 8e9 # Simulation size in (Mpc/h)^3
deltak = 0.01 # k-bin size
zs = np.load('zs.npy')
savedir = 'example_data' # you must create this directory inside your training_data and testing_data folders for the below lines to work
# training data
dat = get_training_data(train_size, pmin_tier1, pmax_tier1, zs, scheme, Vobs, deltak, 'training_data/'+savedir, add_noise=True, use_sigma_8=True, ZCV=True)
# testing data
dat_test = get_training_data(test_size, pmin_tier1, pmax_tier1, zs, scheme, Vobs, deltak, 'testing_data/'+savedir, add_noise=True, use_sigma_8=True, ZCV=True)


Time elapsed:  25.2  minutes
Time elapsed:  1.94  minutes
