/
TransientTablePublisher.py
337 lines (286 loc) · 12.2 KB
/
TransientTablePublisher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: Ampel-contrib-HU/ampel/contrib/hu/t3/TransientTablePublisher.py
# License: BSD-3-Clause
# Author: jnordin@physik.hu-berlin.de
# Date: 06.05.2021
# Last Modified Date: 16.01.2024
# Last Modified By: ernstand@physik.hu-berlin.de
import io
import os
import re
from collections.abc import Generator
from typing import Any
import backoff
import pandas as pd
import requests
from ampel.abstract.AbsPhotoT3Unit import AbsPhotoT3Unit
from ampel.secret.NamedSecret import NamedSecret
from ampel.struct.T3Store import T3Store
from ampel.types import T3Send
from ampel.util.mappings import get_by_path
from ampel.view.TransientView import TransientView
class TransientTablePublisher(AbsPhotoT3Unit):
"""
Construct a table based on selected T2 output values.
Current output format can be csv or latex.
Table can optionally saved to a local file or submitted to slack (future: posted to desy web).
Config parameters:
include_stock (bool)
include_channels (bool)
If one wants to convert the AMPEL stock ID to external IDs, define
convert_stock_to (str|None)
For ZTF-IDs, pass 'convert_stock_to: ztf'
How to deal with names. Will search each transients names for entries containing "value",
and return any output under "key"
name_filter = { 'ZTF name' : 'ZTF', 'TNS ID' : 'TNS' }
Selection of fields to save. Matches structure of t2document result dict, e.g.:
table_schema = { { 't2_unit' : {
'table_label_1' : ['path','to','val'],
'table_label_2' : ['other','path']
},
} }
transient_table_schema = { { 'point_t2_unit' : {
'table_label_1' : ['path','to','val'],
'table_label_2' : ['other','path']
},
} }
Output format (converted through pandas)
fmt = 'csv' # Current options 'csv', 'latex'.
Destination attempted if the appropriate parameters are set for
file_name
slack:
slack_channel
slack_token
local save:
local_path
Todo:
- save to desy webb?
- include format option for printing
"""
# Two tables describing what information to save into the table.
# Schema for state dependent T2s (one row for each)
table_schema: dict[str, Any] = {}
# Schema for transient dependent T2s (added to each row together with base info)
transient_table_schema: dict[str, Any]
name_filter: dict[str, str] = {"ZTF name": "ZTF", "TNS ID": "TNS"}
include_stock: bool = False
convert_stock_to: str | None = None
sort_by_key: str | None = "kilonovaness"
sort_ascending: bool = False
include_pos: bool = True
include_channels: bool = True
# Add also transients lacking any T2 info
save_base_info: bool = False
fmt: str = "csv"
write_mode: str = "a"
rename_files: bool = False
dir_name: str = "TransientTable"
file_name: str = dir_name
slack_channel: None | str = None
slack_token: None | NamedSecret[str]
local_path: None | str = None
move_files: bool = False
def process(
self, gen: Generator[TransientView, T3Send, None], t3s: None | T3Store = None
) -> None:
# def process(self, gen: Generator[SnapView, T3Send, None], t3s: T3Store) -> None:
"""
Loop through provided TransientViews and extract data according to the
configured schema.
"""
table_rows: list[dict[str, Any]] = []
for tran_view in gen:
basetdict: dict[str, Any] = {}
# Assemble t2 information bound to the transient (e.g. Point T2s)
for t2unit, table_entries in self.transient_table_schema.items():
# New SnapView has method for directly retrieve result.
# Possibly use this.
if isinstance(t2res := tran_view.get_latest_t2_body(unit=t2unit), dict):
for label, path in table_entries.items():
basetdict[label] = get_by_path(t2res, path)
# Assemble info which could vary from state to state
# Should add config to labels if multiple exports
# from same unit is requested.
stateinfo = []
for t1_document in tran_view.t1 or []:
t1_link = t1_document["link"]
tdict = {}
for t2unit, table_entries in self.table_schema.items():
if isinstance(
t2res := tran_view.get_latest_t2_body(
unit=t2unit, link=t1_link
),
dict,
):
for label, path in table_entries.items():
tdict[label] = get_by_path(t2res, path)
if len(tdict) > 0:
stateinfo.append(tdict)
if (
len(stateinfo) == 0
and len(basetdict.keys()) == 0
and not self.save_base_info
):
continue
# Collect base information applying to all states
# If here, add stock info (name, channel etcs)
if names := (tran_view.stock or {}).get("name", []):
for label, name_str in self.name_filter.items():
r = re.compile(name_str)
# While names will mostly be unique, it might not always be the case.
basetdict[label] = list(filter(r.match, names)) # type: ignore[arg-type]
# Avoid list when possible
if (
isinstance((item := basetdict[label]), list | tuple)
and len(item) == 1
):
basetdict[label] = item[0]
if self.include_stock:
basetdict["stock"] = tran_view.id
if self.convert_stock_to is not None:
assert self.convert_stock_to in ["ztf"]
if self.convert_stock_to == "ztf":
from ampel.ztf.util.ZTFIdMapper import ZTFIdMapper
stock_id = tran_view.id
ztf_id = ZTFIdMapper.to_ext_id(stock_id)
basetdict["ztf_id"] = ztf_id
if self.include_pos:
lcurve = tran_view.get_lightcurves()
if lcurve is not None:
pos = lcurve[0].get_pos(ret="brightest")
if pos is not None:
basetdict["ra"] = pos[0]
basetdict["dec"] = pos[1]
if self.include_channels and tran_view.stock:
channels = tran_view.stock.get("channel")
# Allow for both single (most common) and duplacte channels.
basetdict["channels"] = (
channels[0]
if isinstance(channels, list | tuple) and len(channels) == 1
else channels
)
# Collect and add to table
if len(stateinfo) > 0:
for tdict in stateinfo:
tdict.update(basetdict)
table_rows.append(tdict)
else:
# Only transient info
table_rows.append(basetdict)
self.logger.info("", extra={"table_count": len(table_rows)})
if len(table_rows) == 0:
return
# Export assembled information
# Convert
df = pd.DataFrame.from_dict(table_rows)
# if "map_name" in df.columns and "map_seed" in df.columns:
# df["map_name"] = np.char.replace(np.array(df["map_name"], dtype=str), "random", "random"+df["map_seed"])
# print(df["map_name"].iloc[0])
if "map_seed" in df or self.rename_files:
# print("transienttablepublisher:: ", df["map_seed"].iloc[0])
tmp_seed_name = df["map_seed"].iloc[0]
if isinstance(tmp_seed_name, str):
self.file_name += "_" + tmp_seed_name
else:
self.file_name += "_" + str(int(tmp_seed_name))
# sort dataframe by key
if self.sort_by_key in df.keys(): # noqa: SIM118
df = df.sort_values(by=self.sort_by_key, ascending=self.sort_ascending)
else:
self.logger.warn(
f"Cannot sort table by {self.sort_by_key} - legal keys: {df.keys()}"
)
# Local save
if self.local_path is not None:
path_name = os.path.join(self.local_path, self.dir_name)
# print("PATHNAME::", path_name)
if not os.path.exists(path_name):
os.makedirs(path_name, exist_ok=True)
full_path = os.path.join(path_name, self.file_name)
# print("FILE PATH::", full_path)
with open(full_path + "." + self.fmt, "w") as tmp_file:
tmp_file.close()
if self.fmt == "csv":
# print(self.write_mode)
df.to_csv(full_path + ".csv", sep=";", mode=self.write_mode)
elif self.fmt == "latex":
df.to_latex(full_path + ".tex")
elif self.fmt == "json":
json_dumps = df.to_json(indent=2)
with open(full_path + ".json", self.write_mode) as json_file:
json_file.write(json_dumps)
json_file.close()
self.logger.info("Exported", extra={"path": full_path})
# Export to slack if requested
self._slack_export(df)
# Could potentially return a document to T3 collection detailing
# what was done, as well as the table itself.
# take everything local_path and put it into new folder named after skymap
# print(df.keys)
map_name_key = "map_name"
if map_name_key in df and self.move_files:
files_local_path = os.listdir(self.local_path)
skymap_name = df[map_name_key][
0
] # need to change if for some reason several maps get saved in same file
skymap_dir_name = skymap_name # [: skymap_name.find(".")] # bare name
if skymap_name[-1] != "z": # if non trivial rev version (hacky)
skymap_dir_name += (
"_rev_" + skymap_name[skymap_name.find(",") + 1 :]
) # find "," and add rev version after that
print("TransientTablePublisher: TMP FILES MOVED TO " + skymap_dir_name)
if self.local_path is not None:
skymap_directory = os.path.join(
self.local_path + "/../" + skymap_dir_name
)
# print(skymap_directory)
os.makedirs(skymap_directory, exist_ok=True)
for file in files_local_path:
if file.find(".fits.gz") == -1:
tmp_file_path = os.path.join(self.local_path, file)
if not (os.path.isfile(tmp_file_path)):
continue
os.replace(tmp_file_path, os.path.join(skymap_directory, file))
return
@backoff.on_exception(
backoff.expo,
requests.ConnectionError,
max_tries=5,
factor=10,
)
@backoff.on_exception(
backoff.expo,
requests.HTTPError,
giveup=lambda e: not isinstance(e, requests.HTTPError)
or e.response.status_code not in {503, 429},
max_time=60,
)
def _slack_export(self, df):
"""
Export content of Pandas dataframe to slack.
"""
if self.slack_channel is None or self.slack_token is None:
return
# Slack summary
buffer = io.StringIO(self.file_name)
if self.fmt == "csv":
df.to_csv(buffer, sep=";")
elif self.fmt == "latex":
df.to_latex(buffer)
param = {
"token": self.slack_token.get(),
"channels": self.slack_channel,
"title": "From the Table Publisher",
"username": "AMPEL-live",
"as_user": "false",
"filename": self.file_name,
}
ret = requests.post(
"https://slack.com/api/files.upload",
params=param,
files={"file": buffer.getvalue()},
)
ret.raise_for_status()
self.logger.info(ret.text)
return