-
Notifications
You must be signed in to change notification settings - Fork 30
/
shear_test.py
394 lines (332 loc) · 16.8 KB
/
shear_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
from __future__ import unicode_literals, absolute_import, division
import os
import time
import numpy as np
from scipy.interpolate import interp1d
from scipy.integrate import quad
from sklearn.cluster import k_means
import treecorr
import pyccl as ccl
import camb
import camb.correlations
import astropy.units as u
import astropy.constants as const
from astropy.cosmology import WMAP7 # pylint: disable=no-name-in-module
from GCR import GCRQuery
from .base import BaseValidationTest, TestResult
from .plotting import plt
pars = camb.CAMBparams()
__all__ = ['ShearTest']
class ShearTest(BaseValidationTest):
"""
Validation test for shear and convergence quantities
"""
def __init__(self,
z='redshift_true',
ra='ra',
dec='dec',
e1='shear_1',
e2='shear_2_phosim',
mag='Mag_true_r_sdss_z0',
maglim=-19.0,
kappa='convergence',
nbins=20,
min_sep=2.5,
max_sep=250,
sep_units='arcmin',
bin_slop=0.1,
zlo=0.5,
zhi=0.6,
ntomo=2,
z_range=0.05,
do_jackknife=False,
N_clust=10,
**kwargs):
#pylint: disable=W0231
self.axsize = kwargs.get('axsize', 17)
self.title_size = kwargs.get('title_size', 18)
self.legend_size = kwargs.get('legend_size', 15)
self.truncate_cat_name = kwargs.get('truncate_cat_name', True)
self.z = z
#sep-bounds and binning
self.min_sep = min_sep
self.max_sep = max_sep
self.nbins = nbins
self.sep_bins = np.linspace(min_sep, max_sep, nbins + 1)
self.sep_units = sep_units
self.bin_slop = bin_slop
self.ra = ra
self.dec = dec
self.mag = mag
self.maglim = maglim
self.e1 = e1
self.e2 = e2
self.kappa = kappa
self.N_clust = N_clust
self.do_jackknife = do_jackknife
# cut in redshift
self.summary_fig, self.summary_ax = plt.subplots(nrows=2, ncols=ntomo, sharex=True, squeeze=False, figsize=(ntomo*5, 5))
self.ntomo = ntomo
self.z_range = z_range
self.zlo = zlo
self.zhi = zhi
self.zmeans = np.linspace(self.zlo, self.zhi, self.ntomo+2)[1:-1]
def compute_nz(self, n_z):
'''create interpolated n(z) distribution'''
z_bins = np.linspace(self.zlo, self.zhi, 301)
n = np.histogram(n_z, bins=z_bins)[0]
z = (z_bins[1:] - z_bins[:-1]) / 2. + z_bins[:-1]
n2 = interp1d(z, n, bounds_error=False, fill_value=0.0, kind='cubic')
n2_sum = quad(n2, self.zlo, self.zhi)[0]
n2 = interp1d(z, n / n2_sum, bounds_error=False, fill_value=0.0, kind='cubic')
return n2
def theory_corr(self, n_z2, xvals, lmax2, chi_max, zlo2, zhi2, cosmo_cat):
'''compute the correlation function from limber integration over the CAMB power spectrum'''
nz_int = self.compute_nz(n_z2)
z_vals = np.linspace(zlo2,zhi2,1000)
n_vals = nz_int(z_vals)
ns = getattr(cosmo_cat, 'n_s', 0.963)
s8 = getattr(cosmo_cat, 'sigma8', 0.8)
Omega_c = (cosmo_cat.Om0 - cosmo_cat.Ob0)
Omega_b = cosmo_cat.Ob0
h = cosmo_cat.H0.value/100.
cosmo_ccl = ccl.Cosmology(Omega_c=Omega_c, Omega_b=Omega_b, h=h, sigma8 = s8, n_s = ns)#, transfer_function='boltzmann_class', matter_power_spectrum='emu')
ll = np.arange(0, 15000)
lens1 = ccl.WeakLensingTracer(cosmo_ccl, dndz=(z_vals, n_vals))
pp = ccl.angular_cl(cosmo_ccl, lens1, lens1, ll)
pp3_2 = np.zeros((lmax2, 4))
pp3_2[:, 1] = pp[:] * (ll * (ll + 1.)) / (2. * np.pi)
cxvals = np.cos(xvals / (60.) / (180. / np.pi))
vals = camb.correlations.cl2corr(pp3_2, cxvals)
return xvals, vals[:, 1], vals[:, 2]
def get_score(self, measured, theory, cov, opt='diagonal'):
if opt == 'cov':
cov = np.matrix(cov).I
print("inverse covariance matrix")
print(cov)
chi2 = np.matrix(measured - theory) * cov * np.matrix(measured - theory).T
elif opt == 'diagonal':
chi2 = np.sum([(measured[i] - theory[i])**2 / cov[i][i] for i in range(len(measured))])
else:
chi2 = np.sum([(measured[i] - theory[i])**2 / theory[i]**2 for i in range(len(measured))])
diff = chi2 / float(len(measured))
return diff
def jackknife(self, catalog_data, xip, xim, mask):
" computing jack-knife covariance matrix using K-means clustering"
#k-means clustering to define areas
#NOTE: This is somewhat deprecated, the jack-knifing takes too much effort to find appropriately accurate covariance matrices.
# If you want to use this, do a quick convergence check and some timing tests on small N_clust values (~5 to start) first.
# note also that this is comparing against the (low) variance in the catalog which might not be a great comparison -no shape noise
N_clust = self.N_clust
nn = np.stack((catalog_data[self.ra][mask], catalog_data[self.dec][mask]), axis=1)
_, labs, _ = k_means(n_clusters=N_clust, random_state=0, X=nn) # check random state
print("computing jack-knife errors")
time_jack = time.time()
# jack-knife code
xip_jack = []
xim_jack = []
gg = treecorr.GGCorrelation(
nbins=self.nbins,
min_sep=self.min_sep,
max_sep=self.max_sep,
sep_units='arcmin',
bin_slop=self.bin_slop,
verbose=True)
for i in range(N_clust):
##### shear computation excluding each jack-knife region
mask_jack = (labs != i)
cat_s = treecorr.Catalog(
ra=catalog_data[self.ra][mask][mask_jack],
dec=catalog_data[self.dec][mask][mask_jack],
g1=catalog_data[self.e1][mask][mask_jack] - np.mean(catalog_data[self.e1][mask][mask_jack]),
g2=-(catalog_data[self.e2][mask][mask_jack] - np.mean(catalog_data[self.e2][mask][mask_jack])),
ra_units='deg',
dec_units='deg')
gg.process(cat_s)
xip_jack.append(gg.xip)
xim_jack.append(gg.xim)
## debugging outputs
print("xip_jack")
print(i)
print(gg.xip)
print("time = " + str(time.time() - time_jack))
### assign covariance matrix - loop is poor python syntax but compared to the time taken for the rest of the test doesn't really matter
cp_xip = np.zeros((self.nbins, self.nbins))
for i in range(self.nbins):
for j in range(self.nbins):
for k in range(N_clust):
cp_xip[i][j] += N_clust/(N_clust - 1.) * (xip[i] - xip_jack[k][i] * 1.e6) * (
xip[j] - xip_jack[k][j] * 1.e6)
cp_xim = np.zeros((self.nbins, self.nbins))
for i in range(self.nbins):
for j in range(self.nbins):
for k in range(N_clust):
cp_xim[i][j] += N_clust/(N_clust - 1.) * (xim[i] - xim_jack[k][i] * 1.e6) * (
xim[j] - xim_jack[k][j] * 1.e6)
return cp_xip, cp_xim
@staticmethod
def get_catalog_data(gc, quantities, filters=None):
'''
Get quantities from catalog
'''
data = {}
if not gc.has_quantities(quantities):
return TestResult(skipped=True, summary='Missing requested quantities')
data = gc.get_quantities(quantities, filters=filters)
#make sure data entries are all finite
data = GCRQuery(*((np.isfinite, col) for col in data)).filter(data)
return data
# define theory from within this class
def post_process_plot(self, ax, fig):
'''
Post-processing routines on plot
'''
# vmin and vmax are very rough DES-like limits (maximum and minimum scales)
for i in range(self.ntomo):
for ax_this, vmin, vmax in zip(ax[:, i], (2.5, 35), (200, 200)):
ax_this.set_xscale('log')
ax_this.axvline(vmin, ls='--', c='k')
ax_this.axvline(vmax, ls='--', c='k')
ax[-1][i].set_xlabel(r'$\theta \; {\rm (arcmin)}$', size=self.axsize)
ax[0][i].set_title('z = {:.2f}'.format(self.zmeans[i]), size=self.title_size)
ax[0][i].legend(fontsize=self.legend_size, frameon=True)
ax[-1][i].legend(fontsize=self.legend_size, frameon=True)
ax[0][0].set_ylabel(r'$\xi_{{{}}} \; (10^{{-6}})$'.format('+'), size=self.axsize)
ax[-1][0].set_ylabel(r'$\xi_{{{}}} \; (10^{{-6}})$'.format('-'), size=self.axsize)
fig.subplots_adjust(hspace=0)
def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir):
'''
run test on a single catalog
'''
# check if needed quantities exist
if not catalog_instance.has_quantities([self.z, self.ra, self.dec]):
return TestResult(skipped=True, summary='do not have needed location quantities')
if not catalog_instance.has_quantities([self.e1, self.e2, self.kappa]):
return TestResult(skipped=True, summary='do not have needed shear quantities')
if not catalog_instance.has_quantities([self.mag]):
return TestResult(skipped=True, summary='do not have required magnitude quantities for cuts')
cosmo = getattr(catalog_instance, 'cosmology', WMAP7)
ntomo = self.ntomo
fig, ax = plt.subplots(nrows=2, ncols=ntomo, sharex=True, squeeze=False, figsize=(ntomo*5, 5))
zmeans = np.linspace(self.zlo, self.zhi, ntomo+2)[1:-1]
#zmeans = np.linspace(self.zlo, zhi, ntomo+2)[1:-1]
if self.truncate_cat_name:
catalog_name = catalog_name.partition('_')[0]
for ii in range(ntomo):
z_mean = zmeans[ii]
zlo2 = z_mean - self.z_range
zhi2 = z_mean + self.z_range
print(zlo2, zhi2)
filter_tomo = [(lambda z: (z > zlo2) & (z < zhi2), self.z)]
catalog_data = self.get_catalog_data(
catalog_instance, [self.z, self.ra, self.dec, self.e1, self.e2, self.kappa, self.mag], filters=filter_tomo)
# before this made sense since it was for the full catalog but now it doesnt for each tomo bin.
#z_max = np.max(catalog_data[self.z])
#if self.zhi>z_max:
# print("updating zhi to "+ str(z_max)+ " from "+ str(self.zhi))
# self.zhi = z_max
# zhi = z_max
#else:
# zhi = self.zhi
chi_max = cosmo.comoving_distance(self.zhi+1.0).value
mask = (catalog_data[self.mag][:]<self.maglim)
# read in shear values and check limits
max_e1 = np.max(catalog_data[self.e1])
min_e1 = np.min(catalog_data[self.e1])
max_e2 = np.max(catalog_data[self.e2])
min_e2 = np.min(catalog_data[self.e2])
if ((min_e1 < (-1.)) or (max_e1 > 1.0)):
return TestResult(skipped=True, summary='e1 values out of range [-1,+1]')
if ((min_e2 < (-1.)) or (max_e2 > 1.0)):
return TestResult(skipped=True, summary='e2 values out of range [-1,+1]')
# compute shear auto-correlation
cat_s = treecorr.Catalog(
ra=catalog_data[self.ra][mask],
dec=catalog_data[self.dec][mask],
g1=catalog_data[self.e1][mask] - np.mean(catalog_data[self.e1][mask]),
g2=-(catalog_data[self.e2][mask] - np.mean(catalog_data[self.e2][mask])),
ra_units='deg',
dec_units='deg')
gg = treecorr.GGCorrelation(
nbins=self.nbins,
min_sep=self.min_sep,
max_sep=self.max_sep,
sep_units='arcmin',
bin_slop=self.bin_slop,
verbose=True)
gg.process(cat_s)
r = np.exp(gg.meanlogr)
#NOTE: We are computing 10^6 x correlation function for easier comparison
xip = gg.xip * 1.e6
xim = gg.xim * 1.e6
print("npairs = ")
print(gg.npairs)
do_jackknife = self.do_jackknife
# Diagonal covariances for error bars on the plots. Use full covariance matrix for chi2 testing.
if do_jackknife:
cp_xip, cp_xim = self.jackknife(catalog_data, xip, xim, mask)
print(cp_xip)
sig_jack = np.zeros((self.nbins))
sigm_jack = np.zeros((self.nbins))
for i in range(self.nbins):
sig_jack[i] = np.sqrt(cp_xip[i][i])
sigm_jack[i] = np.sqrt(cp_xim[i][i])
else:
sig_jack = np.zeros((self.nbins))
sigm_jack = np.zeros((self.nbins))
for i in range(self.nbins):
sig_jack[i] = np.sqrt(gg.varxip[i])*1.e6
sigm_jack[i] = np.sqrt(gg.varxip[i])*1.e6
n_z = catalog_data[self.z][mask]
cosmo_cat = getattr(catalog_instance, 'cosmology', WMAP7)
xvals, theory_plus, theory_minus = self.theory_corr(n_z, r, 15000, chi_max,zlo2,zhi2, cosmo_cat)
theory_plus = theory_plus * 1.e6
theory_minus = theory_minus * 1.e6
if do_jackknife:
chi2_dof_1 = self.get_score(xip, theory_plus, cp_xip, opt='diagonal') #NOTE: correct this to opt=cov if you want full covariance matrix
else:
chi2_dof_1 = self.get_score(xip, theory_plus, 0, opt='nojack') # correct this
print(theory_plus)
print(theory_minus)
print(xip)
print(xim)
print(r)
print(xvals)
#The following are further treecorr correlation functions that could be added in later to extend the test
#treecorr.NNCorrelation(nbins=20, min_sep=2.5, max_sep=250, sep_units='arcmin')
#treecorr.NGCorrelation(nbins=20, min_sep=2.5, max_sep=250, sep_units='arcmin') # count-shear (i.e. <gamma_t>(R))
#treecorr.NKCorrelation(nbins=20, min_sep=2.5, max_sep=250, sep_units='arcmin') # count-kappa (i.e. <kappa>(R))
#treecorr.KKCorrelation(nbins=20, min_sep=2.5, max_sep=250, sep_units='arcmin') # count-kappa (i.e. <kappa>(R))
for ax_this in (ax, self.summary_ax):
ax_this[0, ii].errorbar(r, xip, sig_jack, lw=0.6, marker='o', ls='', color="#3f9b0b", label=r'$\xi_{+}$ ' + catalog_name)
ax_this[0, ii].plot(xvals, theory_plus, 'o', color="#9a0eea", label=r'$\xi_{+}$' + " theory")
ax_this[1, ii].errorbar(r, xim, sigm_jack, lw=0.6, marker='o', ls='', color="#3f9b0b", label=r'$\xi_{-}$ ' + catalog_name)
ax_this[1, ii].plot(xvals, theory_minus, 'o', color="#9a0eea", label=r'$\xi_{-}$' + " theory")
results = {'theta':r, 'xip ':xip, 'xim ':xim, 'theta_theory':xvals, 'xip_theory':theory_plus, 'xim_theory':theory_minus, 'npairs':gg.npairs}
if do_jackknife:
results['xip_err'] = sig_jack
results['xim_err'] = sigm_jack
#save results for catalog and validation data in txt files
filelabel = 'z_{:.2f}'.format(self.zmeans[ii])
theory_keys = [k for k in results.keys() if 'theory' in k]
keys = ['theta'] + [k for k in results.keys() if 'xi' in k and 'theory' not in k] + theory_keys + ['npairs']
with open(os.path.join(output_dir, 'Shear_vs_theta_' + filelabel + '.txt'), 'ab') as f_handle: #open file in append binary mode
self.save_quantities(keys, results, f_handle, comment='z = {:.2f}'.format(self.zmeans[ii]))
self.post_process_plot(ax, fig)
fig.savefig(os.path.join(output_dir, 'plot.png'))
plt.close(fig)
score = chi2_dof_1 #calculate your summary statistics
#TODO: This criteria for the score is effectively a placeholder if jackknifing isn't used and assumes a diagonal covariance if it is
# Proper validation criteria need to be assigned to this test
if score < 2:
return TestResult(score, inspect_only=True)
else:
return TestResult(score, passed=False)
@staticmethod
def save_quantities(keys, results, filename, comment=''):
header = 'Data columns for {} are:\n {}'.format(comment, ' '.join(keys))
np.savetxt(filename, np.vstack((results[k] for k in keys)).T, fmt='%12.4e', header=header)
def conclude_test(self, output_dir):
self.post_process_plot(self.summary_ax, self.summary_fig)
self.summary_fig.savefig(os.path.join(output_dir, 'summary.png'))
plt.close(self.summary_fig)