In [1]:
from sompz.functions_sompz import *
from sompz.functions_WL_Y3 import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import h5py

## Load Balrog data assigned to Deep and Wide cells

In [11]:
deep_som_size = 64*64 
wide_som_size = 32*32

n_bins = 4
bin_edges =  [0.0, 0.358, 0.631, 0.872, 2.0] 

data_dir = './'
run_name = 'test'

balrog_file = '/global/cscratch1/sd/acampos/sompz_data/v0.50_andresa/deep_balrog.pkl'
deep_cells_assignment_balrog_file = './cells_deep_balrog_newSOM_y3.txt'
wide_cells_assignment_balrog_file = './cells_wide_balrog_SOMF_230117.txt'

smooth_response_filename = '/global/u1/d/dgruen/work/sompz/test/full_run_on_data/y3_shape_response_grid_03_31_20.txt'

cosmos_file = '/global/cscratch1/sd/jmyles/COSMOS2015/sompz_cosmos.h5'

wide_field_file = '/global/cscratch1/sd/acampos/sompz_data/Y3_mastercat_03_31_20.h5'
wide_cells_assignment_file = './cells_wide_assignment_all.csv'

In [3]:
balrog_data = build_balrog_df(balrog_file, 
                    deep_cells_assignment_balrog_file, 
                    wide_cells_assignment_balrog_file)

Length of balrog_data: 2417437


In [4]:
len_balrog_data = len(balrog_data)

overlap_weight = calculate_weights(smooth_response_filename, 
                                   balrog_data['unsheared/snr'], 
                                   balrog_data['unsheared/size_ratio'], 
                                   balrog_data["injection_counts"], 
                                   balrog_data['unsheared/weight'], 
                                   len_balrog_data)

balrog_data['overlap_weight'] = overlap_weight

In [5]:
pcchat = calculate_pcchat(deep_som_size, 
                          wide_som_size,
                          balrog_data['cell_deep'],
                          balrog_data['cell_wide_unsheared'], 
                          balrog_data['overlap_weight'])  

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [6]:
spec_data = build_spec_df(cosmos_file, balrog_data)

  self[k1] = value[k2]
  self.obj[key] = empty_value


n duplicated Laigle 39399
all cosmos deep:  521851
matched cosmos deep:  451766
unmatched cosmos deep:  70085


In [12]:
tomo_bins_wide_dict = bin_assignment_spec(spec_data,
                                          deep_som_size,
                                          wide_som_size,
                                          inj_counts = balrog_data['injection_counts'],
                                          inj_ids = balrog_data['true_id'],
                                          bin_edges = bin_edges)

In [8]:
wide_data_assignment = pd.read_csv(wide_cells_assignment_file, index_col = 0, dtype =np.int32)

  mask |= (ar1 == a)


In [9]:
wide_data = build_wide_df(wide_field_file, wide_data_assignment)  

read select metacal done
read coadd_object_id done
read unsheared/T done
read unsheared/snr done
read unsheared/R11 done
read unsheared/R22 done
read unsheared/wight done


In [10]:
wide_data['overlap_weight'] = calculate_wide_overlap_weight(wide_data['unsheared/R11'], 
                                                    wide_data['unsheared/R22'], 
                                                    wide_data['unsheared/weight'])

In [14]:
cell_occupation_info = wide_data.groupby('cell_wide_unsheared')['cell_wide_unsheared'].count()
pickle.dump(cell_occupation_info, open('cell_occupation_info_wide_som_SOMF_230117.pkl', 'wb'))

bin_occupation_info = {'bin' + str(i) : np.sum(cell_occupation_info.loc[tomo_bins_wide_dict[i]].values) for i in range(n_bins)}
print(bin_occupation_info)

{'bin0': 23580877, 'bin1': 27236473, 'bin2': 24773059, 'bin3': 24618535}


In [15]:
# convert to format where tomo_bins_wide[i] is a 2D array, first column cell_id, second column an arbitrary weight
tomo_bins_wide = tomo_bins_wide_2d(tomo_bins_wide_dict)

In [17]:
zbins_max = 6.00
zbins_dz  = 0.01
# settings for output histogram - note that zbins_max needs to agree with Laigle currently

zmax_pileup = 3.0
zmax_weight = 1.0
# all z-bins above zmax_pileup will be piled into this last bin
# Can up or downweight the last bin

zp_catalog = 30

In [18]:
zbins = np.arange(-zbins_dz/2.,zbins_max+zbins_dz,zbins_dz) # need to match Laigle binning if in 'usepz' mode
zmean = (zbins[1:] + zbins[:-1])/2
fullpz = ["Z{:.2f}".format(s).replace(".","_") for s in np.arange(0,6.01,0.01)] # stays hardcoded while we use Laigle catalog as only truth

In [19]:
pointz = 'Z'
key = fullpz
keylabel = 'modal_even'

In [20]:
hists_wide = redshift_distributions_wide(data = wide_data,
                                         deep_data = spec_data,
                                         overlap_weighted_pchat = True, 
                                         overlap_weighted_pzc = True, 
                                         bins = zbins,
                                         pcchat = pcchat,
                                         tomo_bins = tomo_bins_wide,
                                         key = key,
                                         force_assignment = False,
                                         cell_key = 'cell_wide_unsheared')

In [None]:
np.save('Y3_hists_wide_NOT_BIN_CONDITIONALIZED_{}.npy'.format(keylabel), hists_wide)

# cm.save_des_nz(hists_wide, zbins, n_bins, data_dir, run_name, keylabel)
plot_nz(hists_wide, zbins, 'Y3_nz_newbinning_onwide_NOT_BIN_CONDITIONALIZED_{}.png'.format(keylabel))

In [None]:


cells, cell_weights_wide = get_cell_weights_wide(wide_data, overlap_weighted_pchat=True, force_assignment=False, cell_key='cell_wide_unsheared')

np.save(data_dir + 'Y3_pchat_{}.npy'.format(keylabel), cell_weights_wide)

In [None]:
#overlap and hist bin cond

hists_wide_bin_cond = np.array([nz_bin_conditioned(wide_data, spec_data, overlap_weighted_pchat= True, 
                                                   overlap_weighted_pzc=True, tomo_cells=tomo_bins_wide[i], 
                                                   zbins=zbins, pcchat = pcchat, cell_wide_key='cell_wide_unsheared', 
                                                   zkey=key) for i in range(n_bins)])

np.save(data_dir + 'Y3_hists_wide_bin_conditionalized_{}.npy'.format(keylabel), hists_wide_bin_cond)
# cm.save_des_nz(hists_wide_bin_cond, zbins, n_bins, data_dir, run_name, keylabel+'_bincond')
plot_nz(hists_wide_bin_cond, zbins, data_dir + 'Y3_nz_newbinning_onwide_bin_cond_{}.png'.format(keylabel))
plot_nz_overlap([hists_wide_bin_cond], [keylabel], data_dir)
#END bin conditionalized n(z)

In [None]:
####################
# Pile up very high z


zbins_piled,zmean_piled, hists_wide_piled=pileup(hists_wide,zbins,zmean,zmax_pileup,zbins_dz,zmax_weight,n_bins)
zbins_piled,zmean_piled, hists_wide_bin_cond_piled=pileup(hists_wide_bin_cond,zbins,zmean,zmax_pileup,zbins_dz,zmax_weight,n_bins)

#plot comparing bin_cond with pileup
results, deltas = redshift_histograms_stats(hists_wide_piled, hists_wide_bin_cond_piled, zbins_piled, 'using p(z|c,bhat)')
fig_wide = plot_redshift_histograms(hists_wide_piled, hists_wide_bin_cond_piled, zbins_piled, 'Bin Conditioned vs. Not Bin Conditioned n(z)', 'Bin Conditioned', max_pz = 3)
fig_wide.savefig(data_dir + 'Y3_pzc_vs_pzcbhat_wide_faint.png', dpi=300)
print(results)
print(deltas)

#plot comparing pileup effect
means_bc, sigmas_bc = get_mean_sigma(zmean, hists_wide_bin_cond)
means_bc_piled, sigmas_bc_piled = get_mean_sigma(zmean_piled, hists_wide_bin_cond_piled)
plt.figure(figsize=(16.,9.))
colors=['blue','orange','green','red']
for i in range(len(hists_wide)):
    plt.fill_between(zmean, hists_wide_bin_cond[i], color= colors[i],alpha=0.3) #,label="fiducial")
    plt.axvline(means_bc[i], linestyle='-.', color= colors[i],label=str(i)+' %.3f'%(means_bc[i]))
    plt.plot(zmean_piled, hists_wide_bin_cond_piled[i], color= colors[i])#,label="bin conditional")
    plt.axvline(means_bc_piled[i], linestyle='-', color= colors[i],label=str(i)+' pile-up: %.3f'%(means_bc_piled[i]) )
plt.xlabel(r'$z$')
plt.ylabel(r'$p(z)$')
plt.xlim(0,3)
plt.legend()
plt.title('Wide n(z)')
plt.savefig(data_dir + 'Y3_nz_newbinning_onwide_bin_cond_pileup3_faint.png')

#save
deltas.to_pickle(data_dir + 'Y3_deltas_pzc_pzcbhat.pkl')
results.to_pickle(data_dir + 'Y3_results_pzc_pzcbhat.pkl')
np.save(data_dir + 'Y3_hists_wide_bin_conditionalized_pileup3_{}.npy'.format(keylabel), hists_wide_bin_cond_piled)
save_des_nz(hists_wide_bin_cond_piled, zbins_piled, n_bins, data_dir, run_name, keylabel+'_Y3_bincond_pileup3')

#zbins=zbins_piled
hists_wide = hists_wide_bin_cond_piled # this is what is used moving forward.
#END pile up

## Smooth

In [None]:
####################
# Smooth

# templatef='/global/cscratch1/sd/aamon/DVs/template.fits'
templatef='/global/cscratch1/sd/acampos/cosmosis/cosmosis-des-library/y3-3x2pt/data/des-y3/sim_data_vectors/sim_3x2pt_TATT_sample3.fits'


filen='Y3_y3_redshift_distributions_test_modal_even_Y3_bincond_pileup3.fits'
to2point(filen,templatef, run_name,keylabel,data_dir)
filen=data_dir+'Y3_2pt_%s_%s.fits' % (keylabel,run_name)
filen2=data_dir+'Y3_y3_redshift_distributions_%s_%s_bincond_pileup3_smooth.txt' % (keylabel,run_name)
smooth(data_dir+'Y3_2pt_%s_%s.fits' % (keylabel,run_name),filen2, run_name,keylabel,data_dir,hists_wide_bin_cond_piled)

#END smooth

## Generate h5 file

In [43]:
cells_wide_unsheared = wide_data['cell_wide_unsheared'].copy()

In [46]:
coadd = wide_data['coadd_object_id'].copy()

In [60]:
sompzh5_pass = wide_data.rename(columns={"cell_wide_unsheared": "cells_wide_unsheared"})

In [64]:
f = h5py.File('sompz_test.hdf5','r+', track_order=True)

f.create_dataset('catalog/sompz/unsheared/coadd_object_id', data = wide_data['coadd_object_id'])
f.create_dataset('catalog/sompz/unsheared/cell_wide',       data = wide_data['cell_wide_unsheared'])
f.create_dataset('catalog/sompz/sheared_1m/cell_wide',      data = wide_data['cell_wide_sheared_1m'])
f.create_dataset('catalog/sompz/sheared_1p/cell_wide',      data = wide_data['cell_wide_sheared_1m'])
f.create_dataset('catalog/sompz/sheared_2m/cell_wide',      data = wide_data['cell_wide_sheared_1m'])
f.create_dataset('catalog/sompz/sheared_2p/cell_wide',      data = wide_data['cell_wide_sheared_1m'])

<HDF5 dataset "cell_wide": shape (100208944,), type "<i4">

In [65]:
nz = fitsio.read('Y3_y3_redshift_distributions_test_modal_even_Y3_bincond_pileup3.fits')

f.create_dataset('catalog/sompz/pzdata/bin0',  data = nz['BIN1'])
f.create_dataset('catalog/sompz/pzdata/bin1',  data = nz['BIN2'])
f.create_dataset('catalog/sompz/pzdata/bin2',  data = nz['BIN3'])
f.create_dataset('catalog/sompz/pzdata/bin3',  data = nz['BIN4'])
f.create_dataset('catalog/sompz/pzdata/zhigh', data = nz['Z_HIGH'])
f.create_dataset('catalog/sompz/pzdata/zlow',  data = nz['Z_LOW'])

<HDF5 dataset "zlow": shape (300,), type ">f8">

In [66]:
all_wide_cells = np.arange(wide_som_size)
pz_chat = np.array(histogram(wide_data,
                             spec_data,
                             key=key,
                             pcchat = pcchat,
                             cells=all_wide_cells, 
                             cell_weights=np.ones(len(all_wide_cells)), 
                             overlap_weighted_pzc=True,
                             bins=zbins, 
                             individual_chat=True))

np.save(data_dir + 'pzchat.npy', pz_chat)

In [67]:
f.create_dataset('catalog/sompz/pzdata/pz_chat', data = pz_chat)
print(f['catalog/sompz/pzdata/pz_chat'][...].shape)

(1024, 601)


In [68]:
all_deep_cells = np.arange(deep_som_size)
pz_c = np.array(get_deep_histograms(wide_data,
                                    spec_data,
                                    key=key,
                                    cells=all_deep_cells,
                                    overlap_weighted_pzc=True,
                                    bins=zbins))
np.save(data_dir + 'pzc.npy', pz_c)

In [69]:
f.create_dataset('catalog/sompz/pzdata/pz_c', data = pz_c)
print(f['catalog/sompz/pzdata/pz_c'][...].shape)

(4096, 601)


In [70]:
# convert to DF with cell <-> bin relation
tmp_cells = np.concatenate([tomo_bins_wide[nbin][:,0] for nbin in tomo_bins_wide])
tmp_bins = np.concatenate([(np.ones(len(tomo_bins_wide[nbin][:,0])) * nbin).astype(int) for 
                           nbin in tomo_bins_wide])
tomo_bin_hashtable = pd.Series(tmp_bins, tmp_cells)

#sompzh5 = h5py.File(sompzh5_file, mode='a') # read again with no WL selection applied

In [71]:
nrows = coadd.shape[0]

In [72]:
fluxtypes = ['unsheared', 'sheared_1m', 'sheared_1p', 'sheared_2m', 'sheared_2p']

# add binning info to catalog
for fluxtype in fluxtypes:
    print('add binning {}'.format(fluxtype))
    try:
        #print(f['catalog/sompz/unsheared'].keys())
        f.create_dataset('catalog/sompz/{}/{}'.format(fluxtype, 'bhat'), maxshape=(nrows,),
                shape=(nrows,), dtype='i8')
        print("that worked!")
    except Exception as e:
        print(e)
    tmp = wide_data['cell_wide_{}'.format(fluxtype)].map(tomo_bin_hashtable)
    tmp[np.isnan(tmp)] = -1
    f['catalog/sompz/{}/bhat'.format(fluxtype)][...] = tmp


add binning unsheared
that worked!
add binning sheared_1m
that worked!
add binning sheared_1p
that worked!
add binning sheared_2m
that worked!
add binning sheared_2p
that worked!


In [62]:
f.close()