In [6]:
from astropy.io import fits
import h5py
import numpy as np
import os

## Define some variables here:

In [2]:
seed = 2
with_shape_noise = False


# number of bins in the gaussian sims:
s_bins = 4
l_bins = 6

## Let's check out how the files we would like to create look like

In [2]:
path_lens_cat = '/global/cscratch1/sd/jprat/TXPipe/data/cosmodc2/outputs_redmagic/2021/june/7sbins/binned_lens_catalog.hdf5'
f = h5py.File(path_lens_cat,'r')

In [3]:
f['lens'].keys()

<KeysViewHDF5 ['bin_0', 'bin_1', 'bin_2', 'bin_3', 'bin_4', 'bin_all']>

In [4]:
f['provenance'].keys()

<KeysViewHDF5 []>

In [9]:
f['lens/bin_all'].keys()

<KeysViewHDF5 ['dec', 'ra', 'weight']>

In [145]:
path_source_cat = '/global/cscratch1/sd/jprat/TXPipe/data/cosmodc2/outputs_redmagic/2021/june/7sbins/binned_shear_catalog.hdf5'
f = h5py.File(path_source_cat,'r')

In [160]:
f['shear/bin_0'].keys()

<KeysViewHDF5 ['dec', 'g1', 'g2', 'ra', 'weight']>

In [150]:
f['shear'].attrs.keys()

<KeysViewHDF5 ['bin_0', 'bin_1', 'bin_2', 'bin_3', 'bin_4', 'bin_5', 'bin_6', 'bin_7', 'nbin', 'nbin_source']>

In [152]:
f['shear'].attrs['nbin']

8

In [153]:
f['shear'].attrs['nbin_source']

7

In [63]:
f['randoms/'].keys()

<KeysViewHDF5 ['bin_0', 'bin_1', 'bin_2', 'bin_3', 'bin_4']>

In [64]:
f['randoms/bin_0'].keys()

<KeysViewHDF5 ['dec', 'ra', 'z']>

## Create the h5py file for the the lens sample

In [11]:
for seed in range(2,11):
    hf = h5py.File('/global/cscratch1/sd/jprat/gaussian_sims/seed%d/binned_lens_catalog.hdf5'%seed, 'w')
    g_lens = hf.create_group('lens')
    folder = '/global/cscratch1/sd/jprat/gaussian_sims/seed%d/'%seed

    ra_all = []
    dec_all = []
    w_all = []

    for i in range(l_bins):
        fits_cat = fits.open(folder + 'cat_count_zbin%d_signalnoise_S.fits'%(i+1))
        ra = fits_cat[1].data['ra']
        dec = fits_cat[1].data['dec']
        w = fits_cat[1].data['w']
        ra_all.extend(ra)
        dec_all.extend(dec)
        w_all.extend(w)
        g_lens.create_dataset('bin_%d/ra'%i,data=ra)
        g_lens.create_dataset('bin_%d/dec'%i,data=dec)
        g_lens.create_dataset('bin_%d/weight'%i,data=w)

    g_lens.create_dataset('bin_all/ra',data=np.array(ra_all))
    g_lens.create_dataset('bin_all/dec',data=np.array(dec_all))
    g_lens.create_dataset('bin_all/weight',data=np.array(w_all))

    g_lens.attrs['nbin_lens'] = l_bins

    hf.close()

In [4]:
def make_directory(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

## Let's repeat the process for the source sample

In [7]:
for seed in range(2,11):
    if with_shape_noise:
        path_sources = '/global/cscratch1/sd/jprat/gaussian_sims/seed%d/with_shape_noise/binned_shear_catalog.hdf5'%seed
        print('We are saving e1, e2 here:', path_sources)
    else:
        path_sources = '/global/cscratch1/sd/jprat/gaussian_sims/seed%d/no_shape_noise/binned_shear_catalog.hdf5'%seed  
        make_directory('/global/cscratch1/sd/jprat/gaussian_sims/seed%d/no_shape_noise/'%seed)
        print('We are saving g1, g2 here:', path_sources)

    hf = h5py.File(path_sources, 'w')
    g_source = hf.create_group('shear')
    folder = '/global/cscratch1/sd/jprat/gaussian_sims/seed%d/'%seed

    ra_all = []
    dec_all = []
    w_all = []
    g1_all = []
    g2_all = []

    for i in range(s_bins):
        fits_cat = fits.open(folder + 'cat_shear_zbin%d_signalnoise_S.fits'%(i+1))
        ra = fits_cat[1].data['ra']
        dec = fits_cat[1].data['dec']
        w = fits_cat[1].data['w']
        if with_shape_noise:
            g1 = fits_cat[1].data['e1']
            g2 = fits_cat[1].data['e2']
        else:
            g1 = fits_cat[1].data['g1']
            g2 = fits_cat[1].data['g2']

        ra_all.extend(ra)
        dec_all.extend(dec)
        w_all.extend(w) 
        g1_all.extend(g1)
        g2_all.extend(g2)

        g_source.create_dataset('bin_%d/ra'%i,data=ra)
        g_source.create_dataset('bin_%d/dec'%i,data=dec)
        g_source.create_dataset('bin_%d/weight'%i,data=w)
        g_source.create_dataset('bin_%d/g1'%i,data=g1)
        g_source.create_dataset('bin_%d/g2'%i,data=g2)

    g_source.create_dataset('bin_all/ra',data=np.array(ra_all))
    g_source.create_dataset('bin_all/dec',data=np.array(dec_all))
    g_source.create_dataset('bin_all/weight',data=np.array(w_all))
    g_source.create_dataset('bin_all/g1',data=np.array(g1_all))
    g_source.create_dataset('bin_all/g2',data=np.array(g2_all))

    # Do the attributes
    g_source.attrs['nbin_source'] = s_bins
    hf.close()

We are saving g1, g2 here: /global/cscratch1/sd/jprat/gaussian_sims/seed2/no_shape_noise/binned_shear_catalog.hdf5
We are saving g1, g2 here: /global/cscratch1/sd/jprat/gaussian_sims/seed3/no_shape_noise/binned_shear_catalog.hdf5
We are saving g1, g2 here: /global/cscratch1/sd/jprat/gaussian_sims/seed4/no_shape_noise/binned_shear_catalog.hdf5
We are saving g1, g2 here: /global/cscratch1/sd/jprat/gaussian_sims/seed5/no_shape_noise/binned_shear_catalog.hdf5
We are saving g1, g2 here: /global/cscratch1/sd/jprat/gaussian_sims/seed6/no_shape_noise/binned_shear_catalog.hdf5
We are saving g1, g2 here: /global/cscratch1/sd/jprat/gaussian_sims/seed7/no_shape_noise/binned_shear_catalog.hdf5
We are saving g1, g2 here: /global/cscratch1/sd/jprat/gaussian_sims/seed8/no_shape_noise/binned_shear_catalog.hdf5
We are saving g1, g2 here: /global/cscratch1/sd/jprat/gaussian_sims/seed9/no_shape_noise/binned_shear_catalog.hdf5
We are saving g1, g2 here: /global/cscratch1/sd/jprat/gaussian_sims/seed10/no_sh

## Let's do the randoms now

In [138]:
hf = h5py.File('/global/cscratch1/sd/jprat/gaussian_sims/seed%d/binned_random_catalog.hdf5'%seed, 'w')
g_rand = hf.create_group('randoms')

In [139]:
folder = '/global/cscratch1/sd/jprat/gaussian_sims/seed%d/'%seed

ra_all = []
dec_all = []
z_all = []
bin_all = []
for i in range(l_bins):
    fits_cat = fits.open(folder + 'randoms_maglim_zbin%d.fits'%(i+1))
    ra = fits_cat[1].data['ra']
    dec = fits_cat[1].data['dec']
    z = fits_cat[1].data['z']
    
    ra_all.extend(ra)
    dec_all.extend(dec)
    z_all.extend(z) 
    bin_all.extend(np.ones(len(ra))*i)

    g_rand.create_dataset('bin_%d/ra'%i,data=ra)
    g_rand.create_dataset('bin_%d/dec'%i,data=dec)
    g_rand.create_dataset('bin_%d/z'%i,data=z)
    

hf.close()

## Now check the input of the Jackknife stage in TXPipe, which requires another input than the twopoint stage

## Now let's deal with the metadata file

In [2]:
meta = h5py.File('/global/cscratch1/sd/jprat/TXPipe/data/cosmodc2/outputs_redmagic/2021/june/7sbins/tracer_metadata.hdf5', 'r')

In [61]:
meta.keys()

<KeysViewHDF5 ['provenance', 'tracers']>

In [56]:
meta['tracers'].keys()

<KeysViewHDF5 ['N_eff', 'N_eff_2d', 'R_S', 'R_S_2d', 'R_gamma_mean', 'R_gamma_mean_2d', 'R_total', 'R_total_2d', 'lens_counts', 'lens_counts_2d', 'lens_density', 'lens_density_2d', 'mean_e1', 'mean_e1_2d', 'mean_e2', 'mean_e2_2d', 'n_eff', 'sigma_e', 'sigma_e_2d', 'source_counts', 'source_counts_2d', 'source_density', 'source_density_2d']>

In [57]:
keys = meta['tracers'].keys()

In [54]:
meta['tracers/lens_density'][:] 

array([0.01811546, 0.0348763 , 0.0598026 , 0.08908671, 0.1072563 ])

In [71]:
meta['tracers/N_eff'][:]

array([6401491., 5513226., 9845754., 7546428., 3638492.,  794157.,
         92664.], dtype=float32)

In [72]:
meta['tracers/source_counts'][:]

array([6401491, 5513226, 9845754, 7546428, 3638492,  794157,   92664],
      dtype=int32)

In [6]:
meta['tracers/N_eff_2d'][:]

array([33832212.], dtype=float32)

In [87]:
meta['tracers/R_S_2d'][:]

array([[0., 0.],
       [0., 0.]], dtype=float32)

In [8]:
meta['tracers/sigma_e'][:]

array([0.2600776 , 0.25999746, 0.26015183, 0.2601616 , 0.26032376,
       0.260263  , 0.26117256], dtype=float32)

In [107]:
meta['tracers/n_eff'][:]

array([4.0295334 , 3.4703991 , 6.1975865 , 4.7502346 , 2.290314  ,
       0.49989638, 0.05832902], dtype=float32)

In [108]:
meta['tracers/source_density'][:]

array([4.02953349, 3.47039913, 6.19758669, 4.75023465, 2.29031414,
       0.49989639, 0.05832902])

### Let's create a new metadata file with the right number of redshift bins

In [156]:
meta_w = h5py.File('/global/cscratch1/sd/jprat/gaussian_sims/seed%d/tracer_metadata.hdf5'%seed, 'w')

In [157]:
tracers = meta_w.create_group('tracers/')

In [158]:
N_eff = [10846462., 10968695., 10696726., 11328877.]
N_eff_2d = np.sum(N_eff)
R_S = [[[0,0], [0,0]], [[0,0], [0,0]], [[0,0], [0,0]], [[0,0], [0,0]]]
R_S_2d = np.sum(R_S, axis = 0)
R_gamma_mean = [[[1,0], [0,1]], [[1,0], [0,1]], [[1,0], [0,1]], [[1,0], [0,1]]]
R_gamma_mean_2d = np.sum(R_gamma_mean, axis = 0)/s_bins
R_total = [[[1,0], [0,1]], [[1,0], [0,1]], [[1,0], [0,1]], [[1,0], [0,1]]]
R_total_2d = np.sum(R_gamma_total, axis = 0)/s_bins
lens_counts = [10846462, 10846462, 10846462, 10846462, 10846462, 10846462] # invented! copying the source one so far since I don't have it. 
lens_counts_2d = np.sum(lens_counts)
lens_density = [0.1499, 0.1072, 0.1091, 0.1458, 0.1062, 0.1002]
lens_density_2d = np.sum(lens_density)
mean_e1 = np.zeros(s_bins)
mean_e1_2d = 0.
mean_e2 = np.zeros(s_bins)
mean_e2_2d = 0.
n_eff = [0.1499, 0.1499, 0.1499, 0.1499] # Invented!!! copying the lens one so far since I don't have it. 
sigma_e = [0.37009, 0.39709, 0.39291, 0.45572] # as sqrt(std(e1)**2 + std(e2)**2)
sigma_e_2d = np.sum(sigma_e)
source_counts = N_eff
source_counts_2d = np.sum(source_counts)
source_density = n_eff
source_density_2d = np.sum(source_density)

In [159]:
meta_w.create_dataset('tracers/N_eff',data=np.array(N_eff))
meta_w.create_dataset('tracers/N_eff_2d',data=np.array(N_eff_2d))
meta_w.create_dataset('tracers/R_S',data=np.array(R_S))
meta_w.create_dataset('tracers/R_S_2d',data=np.array(R_S_2d))
meta_w.create_dataset('tracers/R_gamma_mean',data=np.array(R_gamma_mean))
meta_w.create_dataset('tracers/R_gamma_mean_2d',data=np.array(R_gamma_mean_2d))
meta_w.create_dataset('tracers/R_total',data=np.array(R_total))
meta_w.create_dataset('tracers/R_total_2d',data=np.array(R_total_2d))
meta_w.create_dataset('tracers/lens_counts',data=np.array(lens_counts))
meta_w.create_dataset('tracers/lens_counts_2d',data=np.array(lens_counts_2d))
meta_w.create_dataset('tracers/lens_density',data=np.array(lens_density))
meta_w.create_dataset('tracers/lens_density_2d',data=np.array(lens_density_2d))
meta_w.create_dataset('tracers/mean_e1',data=np.array(mean_e1))
meta_w.create_dataset('tracers/mean_e1_2d',data=np.array(mean_e1_2d))
meta_w.create_dataset('tracers/mean_e2',data=np.array(mean_e2))
meta_w.create_dataset('tracers/mean_e2_2d',data=np.array(mean_e2_2d))
meta_w.create_dataset('tracers/n_eff',data=np.array(n_eff))
meta_w.create_dataset('tracers/sigma_e',data=np.array(sigma_e))
meta_w.create_dataset('tracers/sigma_e_2d',data=np.array(sigma_e_2d))
meta_w.create_dataset('tracers/source_counts',data=np.array(source_counts))
meta_w.create_dataset('tracers/source_counts_2d',data=np.array(source_counts_2d))
meta_w.create_dataset('tracers/source_density',data=np.array(source_density))
meta_w.create_dataset('tracers/source_density_2d',data=np.array(source_density_2d))


# Do the attributes
tracers.attrs['area'] = 1763.682 #sq deg, for the south part of Yuukis sims. 

meta_w.close()

## Let's create the redshift distributions files

In [39]:
# source sample
pz_s = h5py.File('/global/cscratch1/sd/jprat/gaussian_sims/seed%d/shear_photoz_stack.hdf5'%seed, 'w')
n_of_z = pz_s.create_group('n_of_z')
source = pz_s.create_group('n_of_z/source')
source2d = pz_s.create_group('n_of_z/source2d')

z = np.loadtxt('/global/cscratch1/sd/jprat/gaussian_sims/seed1/nz_source/z.txt') # the nz is the same for all seeds
source.create_dataset('z',data=z)
source2d.create_dataset('z',data=z)

nz_all = np.zeros(len(z))
for i in range(s_bins):
    nz = np.loadtxt('/global/cscratch1/sd/jprat/gaussian_sims/seed%d/nz_source/bin_%d.txt'%(seed, i+1))
    nz_all += nz
    source.create_dataset('bin_%d'%i,data=nz)
source2d.create_dataset('bin_0',data=nz_all)
pz_s.close()

In [52]:
# Lens sample
pz_l = h5py.File('/global/cscratch1/sd/jprat/gaussian_sims/seed%d/lens_photoz_stack.hdf5'%seed, 'w')
n_of_z = pz_l.create_group('n_of_z')
lens = pz_l.create_group('n_of_z/lens')

z = np.loadtxt('/global/cscratch1/sd/jprat/gaussian_sims/seed1/nz_lens/z.txt') # the nz is the same for all seeds
lens.create_dataset('z',data=z)
for i in range(l_bins):
    nz = np.loadtxt('/global/cscratch1/sd/jprat/gaussian_sims/seed%d/nz_lens/bin_%d.txt'%(seed, i+1))
    lens.create_dataset('bin_%d'%i,data=nz)
pz_l.close()
