/
T2LSPhotoZTap.py
305 lines (252 loc) · 10.1 KB
/
T2LSPhotoZTap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File : Ampel-contrib-HU/ampel/contrib/hu/t2/T2LSPhotoZTap.py
# License : BSD-3-Clause
# Author : jnordin
# Date : 20.04.2021
# Last Modified Date: 21.04.2021
# Last Modified By : jnordin
from collections import OrderedDict
from collections.abc import Sequence
from functools import partial
from io import BytesIO
from math import acos, cos, pi, sin
from typing import Any
import backoff
import numpy as np
import requests
from astropy.io.votable import parse_single_table
from astropy.table import Table
# Datalab
from dl import authClient
from pandas import read_csv
from ampel.abstract.AbsPointT2Unit import AbsPointT2Unit
from ampel.content.DataPoint import DataPoint
from ampel.enum.DocumentCode import DocumentCode
from ampel.struct.UnitResult import UnitResult
from ampel.types import UBson
def convert(inp, outfmt="pandas", verbose=False, **kwargs):
"""
*** Taken from datalab dl/helpers/util/convert ***
(to avoid loading alld ependencies)
Convert input `inp` to a data structure defined by `outfmt`.
Parameters
----------
inp : str
String representation of the result of a query. Usually this
is a CSV-formatted string, but can also be, e.g. an
XML-formatted votable (as string)
outfmt : str
The desired data structure for converting `inp` to. Default:
'pandas', which returns a Pandas dataframe. Other available
conversions are:
string - no conversion
array - Numpy array
structarray - Numpy structured array (also called record array)
table - Astropy Table
votable - Astropy VOtable
For outfmt='votable', the input string must be an
XML-formatted string. For all other values, as CSV-formatted
string.
verbose : bool
If True, print status message after conversion. Default: False
kwargs : optional params
Will be passed as **kwargs to the converter method.
Example
-------
Convert a CSV-formatted string to a Pandas dataframe
.. code-block:: python
arr = convert(inp,'array')
arr.shape # arr is a Numpy array
df = convert(inp,outfmt='pandas')
df.head() # df is as Pandas dataframe, with all its methods
df = convert(inp,'pandas',na_values='Infinity') # na_values is a kwarg; adds 'Infinity' to list of values converter to np.inf
"""
# When there are duplicate column names in the table, it would not work when converting to Astropy Table and Votable, so we
# have to add '_n' as an identifier to the duplicate column names.
index = inp.find("\n")
header = inp[0:index]
inp = inp[index + 1 :]
list = header.split(",")
col_dict: dict[str, int] = {}
new_s = ""
for l in list:
if l in col_dict:
n = col_dict[l]
col_dict[l] = n + 1
new_s += l + "_" + str(n) + ","
else:
new_s += l + ","
col_dict[l] = 1
inp = new_s[:-1] + "\n" + inp
# map outfmt container types to a tuple:
# (:func:`queryClient.query()` fmt-value, descriptive title,
# processing function for the result string)
mapping = OrderedDict(
[
(
"string",
("csv", "CSV formatted table as a string", lambda x: x.getvalue()),
),
(
"array",
(
"csv",
"Numpy array",
partial(np.loadtxt, unpack=False, skiprows=1, delimiter=","),
),
),
(
"structarray",
(
"csv",
"Numpy structured / record array",
partial(np.genfromtxt, dtype=float, delimiter=",", names=True),
),
),
("pandas", ("csv", "Pandas dataframe", read_csv)),
("table", ("csv", "Astropy Table", partial(Table.read, format="csv"))),
("votable", ("votable", "Astropy VOtable", parse_single_table)),
]
)
if isinstance(inp, bytes):
b = BytesIO(inp)
elif isinstance(inp, str):
b = BytesIO(inp.encode())
else:
raise TypeError("Input must be of bytes or str type.")
output = mapping[outfmt][2](b, **kwargs)
if isinstance(output, bytes):
output = output.decode()
if verbose:
print("Returning %s" % mapping[outfmt][1])
return output
class T2LSPhotoZTap(AbsPointT2Unit):
"""
Query the NOIR DataLab service for photometric redshifts from the
Legacy Survey.
Other queries can in principle be made as long as the string format parameters are the same (ra, dec, match_radius).
"""
# Astro DataLab user id
datalab_user: str
datalab_pwd: str
##datalab_str : Secret
# Match parameters
match_radius: float = 10 # in arcsec
# Query. Candidate position and radius will be added
query: str = "SELECT ra, dec, photo_z.z_phot_median, photo_z.z_phot_mean, photo_z.z_phot_l68, z_phot_u68, photo_z.z_spec, tractor.dered_mag_g, tractor.dered_mag_r, tractor.dered_mag_z, tractor.dered_mag_w1, tractor.dered_mag_w2 , tractor.dered_mag_w3, tractor.dered_mag_w4, tractor.snr_g, tractor.snr_r, tractor.snr_z, tractor.snr_w1, tractor.snr_w2, tractor.snr_w3, tractor.snr_w4 FROM ls_dr8.tractor as tractor JOIN ls_dr8.photo_z as photo_z on photo_z.ls_id = tractor.ls_id WHERE 't' = Q3C_RADIAL_QUERY(ra, dec,%.6f,%.6f,%.6f)"
# run only on first datapoint by default
# NB: this assumes that docs are created by DualPointT2Ingester
ingest: dict = {"eligible": {"pps": "first"}}
# Path to noir queries
datalab_query_url: str = "https://datalab.noirlab.edu/query"
def post_init(self) -> None:
# obtain security token
self.token = authClient.login(self.datalab_user, self.datalab_pwd)
# self.token = authClient.login(self.datalab_user, self.datalab_str)
@backoff.on_exception(
backoff.expo,
requests.ConnectionError,
max_tries=5,
factor=10,
)
@backoff.on_exception(
backoff.expo,
requests.HTTPError,
giveup=lambda e: isinstance(e, requests.HTTPError)
and e.response.status_code not in {503, 429},
max_time=60,
)
def _astrolab_query(
self, ra: float, dec: float
) -> Sequence[
dict[str, Any]
]: # Does one need to add List[None] here for empty returns?
self.logger.debug(f"Querying {ra} {dec}")
# Original qery, using the wrong path (and also a lot of dependencies)
## should be possible to adjust this:
## qc = queryClient.queryClient()
## qc.set_svc_url( 'https://datalab.noirlab.edu/query' )
## But this yiels a split error
# Old query
# ret = queryClient.query(self.token, self.query % (ra, dec, float(self.match_radius) / 3600) )
# ret_dict = convert(ret,'pandas').to_dict(orient='records')
# New manual:
headers = {"X-DL-AuthToken": (self.token)}
sql = self.query % (ra, dec, float(self.match_radius) / 3600)
timeout = 300
r = requests.get(
f"{self.datalab_query_url}/query",
params={"sql": sql, "qfmt": "csv", "async": "0"},
headers=headers,
timeout=timeout,
)
if not r.ok:
self.logger.debug(f"DL query failed at {ra} {dec}" % (ra, dec))
return []
# First convert to string and then to dict
ret_dict = convert(str(r.content.decode()), "pandas").to_dict(orient="records")
self.logger.debug("Got %s matches" % (len(ret_dict)))
return ret_dict
def add_separation(
self, match_dict: Sequence[dict[str, Any]], target_ra: float, target_dec: float
) -> Sequence[dict[str, Any]]:
"""
Iterate through catalog entries (dict) and add separation to target.
"""
c = pi / 180
for el in match_dict:
if "dec" in el and "ra" in el:
el["dist2transient"] = (
acos(
sin(target_dec * c) * sin(el["dec"] * c)
+ cos(target_dec * c)
* cos(el["dec"] * c)
* cos((target_ra - el["ra"]) * c)
)
* 206264.8062
) # to arcsecs
else:
el["dist2transient"] = None
return match_dict
def process(self, datapoint: DataPoint) -> UBson | UnitResult:
return_all: bool = (
False # whether to return all matches or closest match (default)
)
"""
Query DataLab through the unit query string combined with
transient position.
:returns:
{
0: {'ra': 247.033806830109,
'dec': 63.8236957439316,
'z_phot_median': 0.364699,
'z_phot_mean': 0.640077,
'z_phot_l68': 0.14177,
'z_phot_u68': 1.30389,
'z_spec': -99,
...,
'dist2transient': 0.30846301868050724},
1: { ... },
}
Note that, when a match is found, the distance of the lightcurve object
to the match counterpart is also returned as the 'dist2transient' key.
"""
try:
transient_ra = datapoint["body"]["ra"]
transient_dec = datapoint["body"]["dec"]
except KeyError:
##return T2RunState.MISSING_INFO
return UnitResult(code=DocumentCode.T2_MISSING_INFO)
# Query Datalab
match_list = self._astrolab_query(transient_ra, transient_dec)
# Add separation between target and query detection
if len(match_list) > 0:
match_list = self.add_separation(match_list, transient_ra, transient_dec)
# Return a T2 result (dict-like)
if len(match_list) > 0:
if return_all:
return {f"T2LSPhotoZTap{k}": item for k, item in enumerate(match_list)}
min_dist = min(match_list, key=lambda x: x["dist2transient"])
return {"T2LSPhotoZTap": min_dist}
return {"T2LSPhotoZTap": None}