-
Notifications
You must be signed in to change notification settings - Fork 0
/
catalogs.py
250 lines (194 loc) · 8.99 KB
/
catalogs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
from astropy.io import fits
import pandas as pd
import numpy as np
from pathlib import Path
def read_fits(file:str,
apply_E_correction:bool=True,
z_cubic:float=0.2,
check_cutsky_format:bool=False,
check_cubic_format:bool=False,
print_column_names:bool=False) -> pd.DataFrame:
"""
Reads a fits file from the Abacus simulation and returns the data as a Pandas DataFrame.
A DESI cutsky file must be in the format of the DESI simulations. (i.e. contain the following columns :
* `RA` : Right ascension (in degrees)
* `DEC` : Declination (in degrees)
* `Z` : Redshift of the observed galaxy (in redshift space)
* `Z_COSMO` : Redshift of the galaxy in the comoving frame (in real space)
* `R_MAG_ABS` : Absolute magnitude
* `R_MAG_APP` : Apparent magnitude
* `G_R_REST` : Color (g-r) of the galaxy in the rest frame
* `HALO_MASS` : Halo mass of the galaxy
* `STATUS` : Status of the galaxy (if the galaxy is in the DESI footprint or not)
)
A DESI cubic box file must be in the format of the DESI simulations. (i.e. contain the following columns :
* `x` : x coordinate of the galaxy in the comoving frame (in real space)
* `y` : y coordinate of the galaxy in the comoving frame (in real space)
* `z` : z coordinate of the galaxy in the comoving frame (in real space)
* `vx` : x-component of velocity, in km/s (not comoving)
* `vy` : y-component of velocity, in km/s (not comoving)
* `vz` : z-component of velocity, in km/s (not comoving)
* `R_MAG_ABS` : Absolute magnitude
* `HALO_MASS` : Halo mass of the galaxy
)
Parameters
----------
file: str
Path to the file.
apply_E_correction: bool, optional
If True, corrects the evolution in the spectrum over the redshift interval on the absolute magnitude.
Defaults to `True`.
z_cubic: float, optional
Redshift of the cubic snapshot for E-correction. Defaults to `0.2`.
check_cutsky_format: bool, optional
If True, checks if the file is in the format of a DESI cutsky (see above). Defaults to `False`.
check_cubic_format: bool, optional
If True, checks if the file is in the format of a DESI cubic box (see above). Defaults to `False`.
print_column_names: bool, optional
If True, prints the names of the columns in the file. Defaults to `False`.
Returns
-------
data: Pandas DataFrame
Data contained in the file.
"""
# Read the fits file
with fits.open(Path(file)) as hdul:
data = hdul[1].data
# Checks if the columns we need are present in the file
cols = ['RA', 'DEC', 'Z', 'R_MAG_ABS', 'R_MAG_APP', 'STATUS', 'HALO_MASS']
is_cutsky = True
for col in cols:
if col not in data.columns.names:
is_cutsky = False
if check_cutsky_format:
raise KeyError(f'The column "{col}" is not present in the file.')
# Checks if the columns we need are present in the file
cols = ['x', 'y', 'z', 'R_MAG_ABS', 'HALO_MASS']
is_cubic = True
for col in cols:
if col not in data.columns.names:
is_cubic = False
if check_cubic_format:
raise KeyError(f'The column "{col}" is not present in the file.')
# Convert the data to a Pandas DataFrame
data = pd.DataFrame(data)
if apply_E_correction and is_cubic: # We apply a constant E-correction if the file is in the format of a DESI cubic box
z = np.full(len(data), z_cubic)
data['R_MAG_ABS'] = data['R_MAG_ABS'] - E_correction(z)
if apply_E_correction and is_cutsky: # We apply the E-correction only if the file is in the format of a DESI cutsky
# Check if the column 'Z' is present in the file
if 'Z' not in data.columns.values:
raise KeyError(f'The column "Z" is not present in the file. Impossible to apply the E-correction.')
z = data['Z']
data['R_MAG_ABS'] = data['R_MAG_ABS'] - E_correction(z)
if print_column_names:
print("The columns of the dataframe are :", data.columns.values)
return data
def E_correction(z, Q0=-0.97, z0=0.1):
"""
Corrects the evolution in the spectrum over the redshift interval on the absolute magnitude.
Parameters
----------
z: array_like
Redshift of the observed galaxy (in redshift space).
Q0: float, optional
Corrective factor.
z0: float, optional
Reference redshift
Returns
-------
E_correction: array_like
Correction to apply on the absolute magnitude.
"""
return Q0 * (z - z0)
def status_mask(main=0, nz=0, Y5=0, sv3=0):
"""
Returns the status value of the points to select in the catalog
"""
return main * (2**3) + sv3 * (2**2) + Y5 * (2**1) + nz * (2**0)
def catalog_cuts(data,
zmin:float=0.1,
zmax:float=2.0,
abs_mag:float=-21.5,
app_mag:float=19.5,
status_mask = status_mask(nz=1, Y5=1),
cap:str='NGC'):
"""
Applies a mask to the provided data to reduce it according to the parameters
Parameters
----------
data : pandas.DataFrame
Data on which the mask should be applied
zmin : float, optional
Minimum value of redshift to keep. Defaults to 0.1
zmax : float, optional
Maximum value of redshift to keep. Defaults to 0.1
abs_mag : float, optional
The cut in absolute magnitude. Only the brighter galaxies (smaller magnitudes) will be kept.
Only applied if the data has a comumn named 'R_MAG_ABS'.
Set to None to disable. Defaults to -21.5
app_mag : float, optional
The cut in apparent magnitude. Only the brighter galaxies (smaller magnitudes) will be kept.
Only applied if the data has a comumn named 'R_MAG_APP'.
Set to None to disable. Defaults to 19.5
status_mask : _type_, optional
The value of the STATUS column to keep.
Requires a bit value, as in https://desi.lbl.gov/trac/wiki/CosmoSimsWG/FirstGenerationMocks
The status_mask() function can also be used.
Defaults to status_mask(nz=1, Y5=1) (i.e. Y5 survey, with n(z) applied)
cap : str, optional
The cap to keep. Can be 'NGC' or 'SGC'. Only applied if the cap column exists.
Set to None to disable. Defaults to 'NGC'
Returns
-------
_type_
_description_
"""
# Get the redshift mask
z = data['Z']
zcut = (z > zmin) & (z < zmax)
# Get the status mask
status_cut = np.full(len(data), True, dtype=bool) # Mask that keeps all the galaxies
if 'STATUS' in data.columns:
status = data['STATUS']
status_cut = status & status_mask == status_mask
# Get the magnitude mask
mag_cut = np.full(len(data), True, dtype=bool) # Mask that keeps all the galaxies
if 'R_MAG_APP' in data.columns and app_mag is not None:
m_r = data['R_MAG_APP']
mag_cut = mag_cut & (m_r < app_mag)
if 'R_MAG_ABS' in data.columns and abs_mag is not None:
M_r = data['R_MAG_ABS']
mag_cut = mag_cut & (M_r < abs_mag)
# Get the cap mask
cap_cut = np.full(len(data), True, dtype=bool) # Mask that keeps all the galaxies
if cap in data.columns and cap is not None:
cap_cut = data[cap] == 1
#
mask = mag_cut & status_cut & zcut & cap_cut
# Apply the mask
data = pd.DataFrame(data.values[mask], columns=data.columns) # Get the values of the mask
return data
def create_random(path:str, multiplier:int=5) -> pd.DataFrame :
"""
Creates a random catalog with the x1 catalogs in the provided path. The number of randoms is equal to `multiplier` times the number of galaxies in the data catalog.
Parameters
----------
path: str
The path to the file containing the x1 random catalogs.
multiplier: int
The number of randoms in the random catalog is equal to `multiplier` times the number of galaxies in the data catalog.
Returns
-------
random: array_like
The random catalog.
"""
path = Path(path)
# Create a list of the name of the random catalogs
random_catalogs = [f"random_S{i}00_1X.fits" for i in range(1, multiplier+1)] # List of the random files
# Read the first random catalog
random = read_fits(path / random_catalogs[0]) # Returns a pandas dataframe
# Loop over the other random catalogs to concatenate them to the first one
for i in range(1, len(random_catalogs)):
random = pd.concat([random, read_fits(path / random_catalogs[i])], ignore_index=True)
return random