-
Notifications
You must be signed in to change notification settings - Fork 0
/
weights.py
297 lines (227 loc) · 10.6 KB
/
weights.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
import numpy as np
import pandas as pd
import healpy as hp
from scipy.interpolate import InterpolatedUnivariateSpline
# Ignore the warning that appears when we are creating a new column in a dataframe
import warnings
try :
warnings.filterwarnings('ignore', category=pd.errors.SettingWithCopyWarning) # Ignore the warning that appears when we are creating a new column in a dataframe
except AttributeError:
warnings.filterwarnings('ignore', category=pd.core.common.SettingWithCopyWarning) # Old version pf pandas
def sky_fraction(ra, dec, nside=256):
"""
Computes the fraction of the sky covered by the survey.
Parameters
----------
ra : array_like
Right ascension of the galaxies in degrees.
dec : array_like
Declination of the galaxies in degrees.
nside : int, optional
The nside parameter of the HEALPix map. Defaults to `256`.
Returns
-------
fsky : float
The fraction of the sky covered by the survey.
"""
npix = hp.nside2npix(nside)
phi = np.radians(ra)
theta = np.radians(90.0 - dec)
pixel_indices = hp.ang2pix(nside, theta, phi)
pixel_unique, counts = np.unique(pixel_indices, return_counts=True)
fsky = len(pixel_unique)/npix # fsky
return fsky
def comoving_volume(cosmo,
z_min,
z_max,
area: float = None,
fsky: float = None):
"""
Computes the comoving volume associated to the redshift bin [`z_min`, `z_max`].
Parameters
----------
cosmo
Cosmology object that can be used to compute the comoving distance.
It must have a method `comoving_radial_distance(z)` that returns the comoving distance at redshift `z`.
z_min: float or array_like
The minimum redshift of the redshift bin. (Can be an array, in which case the output is an array)
Must be of the same format as `z_max` and same length if it is an array.
z_max: float or array_like
The maximum redshift of the redshift bin. (Can be an array, in which case the output is an array)
Must be of the same format as `z_min` and same length if it is an array.
area: float, optional
The area of the survey in square degrees. Defaults to `None`.
fsky: float, optional
The fraction of the sky covered by the survey. Can be provided instead of area. Defaults to `None`.
Returns
-------
comov_vol: float or array_like
Comoving volume associated to the redshift bin [`z_min`, `z_max`].
The output has the same format as `z_min` and `z_max`.
"""
if fsky is None and area is None:
raise ValueError("Either `fsky` or `area` must be specified.")
elif fsky is not None and area is not None:
warnings.warn("Both `fsky` and `area` are specified. `fsky` will be used.")
area = fsky * 4*np.pi * (180.0/np.pi)**2
elif area is None:
area = fsky * 4*np.pi * (180.0/np.pi)**2
# Compute the comoving distance associated to the redshift bin
comov_dist_min = cosmo.comoving_radial_distance(z_min)
comov_dist_max = cosmo.comoving_radial_distance(z_max)
# Convert the area from square degrees to square radians
area = area * (np.pi/180)**2
# Compute the comoving volume associated to the redshift bin
comov_vol = (area/3) * (comov_dist_max**3 - comov_dist_min**3)
return comov_vol
def ScottsBinEdges(data) -> np.ndarray :
"""
Computes the bin edges for a histogram using Scott's rule.
Scott's rule is a rule of thumb for choosing the bin width of a histogram.
It is based on the standard deviation of the data and is a function of the
sample size. It is a good compromise when no other information is known
about the data.
Parameters
----------
data : array_like
The data we want to bin.
Returns
-------
edges : ndarray
The edges of the bins. Length `nbins + 1`.
Notes
-----
Scott's rule defines the bin width as `dx = 3.5 * sigma / n**(1/3)`, where
sigma is the standard deviation of the data and n is the sample size.
(The factor of `3.5` comes from a `24*np.sqrt(np.pi)` factor at the power of 1/3).
"""
# Convert the data to a numpy array
data = np.asarray(data)
# Extract the information needed from the data
n = len(data) # Number of data points
sigma = np.std(data) # Standard deviation of the data
xmin = np.min(data) # Minimum of the data
xmax = np.max(data) # Maximum of the data
# Compute the bin width
factor = (24 * np.sqrt(np.pi))**(1/3) # A factor that appears in Scott's rule, approximated by 3.5
dx = factor * sigma / n**(1/3) # The bin width according to Scott's rule
# Compute the bin edges
nbins = int(np.ceil((xmax - xmin) / dx)) # The number of bins
nbins = max(nbins, 1) # Make sure that there is at least one bin
edges = xmin + dx * np.arange(nbins + 1) # The edges of the bins
return edges
def n_z(z,
cosmo,
edges: list = None,
area: float = None,
fsky:float=None) -> InterpolatedUnivariateSpline:
"""
Computes the number density of galaxies in the data in the given redshift bin.
Parameters
----------
z: array_like
The redshift of the galaxies in the data.
cosmo
Cosmology object that can be used to compute the comoving distance.
It must have a method `comoving_radial_distance(z)` that returns the comoving distance at redshift `z`.
edges: list, optional
The edges of the bins used to compute the number density.
If set to `None`, the edges are computed using Scott's rule. Defaults to `None`.
area: float, optional
The area of the survey in square degrees. Defaults to `None`.
fsky: float, optional
The fraction of the sky covered by the survey. Can be provided instead of area. Defaults to `None`.
Returns
-------
n_func : InterpolatedUnivariateSpline
The number density as a function of redshift.
It can be called as `n_func(z)` to get the number density at redshift `z`.
Notes
-----
The number density is computed as `n(z) = N(z) / V(z)`,
where `N(z)` is the number of galaxies in the redshift bin [`z_min`, `z_max`]
and `V(z)` is the comoving volume associated to the redshift bin [`z_min`, `z_max`].
"""
# Convert the z to a numpy array
z = np.asarray(z)
# Compute the number of galaxies in the redshift bin
if edges is None:
edges = ScottsBinEdges(z)
bin_centers = 0.5*(edges[:-1] + edges[1:])
# Compute the comoving volume associated to the redshift bin
z_min = edges[:-1]
z_max = edges[1:]
V = comoving_volume(cosmo, z_min, z_max, area, fsky) # Comoving volume associated to the redshift bin
# Compute the number of galaxies in the bins
N, _ = np.histogram(z, bins=edges) # Number of galaxies in the redshift bin
nbar = N / V # Number density in the redshift bin
# Interpolate the number density as a function of redshift
n_func = InterpolatedUnivariateSpline(bin_centers, nbar, ext='zeros') # (ext='zeros' means that the number density is set to zero outside of the redshift range of the data)
return n_func
def w_fkp(z_data,
z_random,
cosmo,
edges: list = None,
area: float = None,
fsky: float = None,
P0: float = 7000):
"""
Computes the FKP weights for the data, and returns a column containing the FKP weights. (If cuts need to be applied, they should be applied before calling this function.)
Parameters
----------
z_data: array_like
The redshift of the galaxies in the data.
z_random: array_like
The redshift of the galaxies in the randoms.
cosmo
Cosmology object that can be used to compute the comoving distance.
It must have a method `comoving_radial_distance(z)` that returns the comoving distance at redshift `z`.
edges: list, optional
The edges of the bins used to compute the number density.
If set to `None`, the edges are computed using Scott's rule. Defaults to `None`.
area: float, optional
The area of the survey in square degrees. Defaults to `None`.
fsky: float, optional
The fraction of the sky covered by the survey. Can be provided instead of area. Defaults to `None`.
P0: float, optional
The power spectrum normalization (TODO : Check this definition). Defaults to `7000` for the BGS.
Returns
-------
weight_data: array_like
The FKP weights for the data for the respective galaxies in z_data.
weight_random: array_like
The FKP weights for the data for the respective galaxies in z_random.
"""
# Convert the z to a numpy array
z_data = np.asarray(z_data)
z_random = np.asarray(z_random)
# Compute the number density
n_func = n_z(z_random, cosmo, edges=edges, area=area, fsky=fsky) # Number density as a function of redshift, computed from the randoms (the data should follow the same distribution as the randoms)
# Re-normalize n(z) to the total size of the data catalog
alpha = 1.0 * len(z_data) / len(z_random)
n_data = n_func(z_data) * alpha # Number density at the redshift of the data
n_random = n_func(z_random) * alpha # Number density at the redshift of the randoms
# Compute the FKP weights
weight_data = 1.0 / (1 + n_data*P0)
weight_random = 1.0 / (1 + n_random*P0)
return weight_data, weight_random
from pandas import qcut
def get_quantiles_weight(density,
randoms_weights,
nquantiles=10):
"""
Gets the weights of the quantiles of the density.
Parameters
----------
density : array_like
The density of the data.
randoms_weights : array_like
The weights of the randoms.
nquantiles : int, optional
The number of quantiles to use. Defaults to `10`.
"""
quantiles_idx = qcut(density, nquantiles, labels=False)
quantiles_weights = []
for i in range(nquantiles):
quantiles_weights.append(randoms_weights[quantiles_idx == i])
return quantiles_weights