In [1]:
from collections import Counter
from datetime import datetime, timedelta
from glob import glob
import re
from astropy.coordinates import SkyCoord
from astropy.table import Table
from astropy.time import Time
import astropy.units as u
import numpy as np

## First, explore around to determine how to identify lines representing observations vs other lines in the log files.

In [2]:
# figure out how to find log lines representing observations vs log lines describing procedures
logs_dir = r"..\..\Files\DSSI Logs"
logs_to_process = glob(f"{logs_dir}/**/*.olist", recursive=True)

observation_lines = "DSSI observations.txt"
other_lines = "Non observation lines.txt"

with open(observation_lines, mode="w") as obs, open(other_lines, mode="w") as non_obs:
    for file in logs_to_process:
        datestr = ''.join(re.findall("([0-9]{4})....(...[0-9]{2})", file)[0])
        utc_date = datetime.strptime(datestr, "%Y%b%d")
        with open(file) as f:
            title = f"===========\nFile {file} from {utc_date}\n==========="
            obs.write(title + "\n")
            non_obs.write(title + "\n")
            for line in f.readlines():
                if line.count(":") >= 4: # and line.count("TIC") > 0:
                    obs.write(f"{datestr} {line}")
                else:
                    non_obs.write(line)

# Conclusion: requiring 4 or more colon characters exactly splits the logs

## Now extract all observation lines from all files
* Use more than one regex to match the target lines
  * For speed, make first regex attempted be the one that is empirically observed to match the most lines
* Keep track of lines that "fall through" and aren't matched by any regex pattern
* Standardize things for easier use in analysis
  * Output times both in ISO format and in JD
  * Convert sexagesimal coordinates to decimal degrees
* "Explode" each observation line into multiple lines, one for each wavelength involved (typically 2 or 3)
  * Currently, wavelength values are hard coded at the top of this code.
  * Scanning all log files for lines with "Camera A =" (or B), all files unsurprisingly cite the same set of wavelengths
* Output a CSV of all parsed & standardized observation lines

In [147]:
# now extract useful information from the various lines

logs_dir = r"..\..\Files\DSSI Logs"
logs_to_process = glob(f"{logs_dir}/**/*.olist", recursive=True)
# logs_to_process = ['foo.olist']

filter_blue = 692
filter_red = 880
filter_ir = 1450

dssi_observations = Table(
    names=["Target Name", "TIC ID", "Wavelength", "Image Number", "UTC DateTime", "Time JD", "Gain 1", "Gain 2", "RA", "Dec", "PMRA", "PMDec", "Mag", "Notes", ],
    dtype=["str", "str", "int", "int", "str", "float", "int", "int", "float", "float", "float", "float", "float", "str", ],
)

line_counts = Counter()
failed_lines = []
for file in logs_to_process:
    with open(file) as f:
        datestr = ''.join(re.findall("([0-9]{4})....(...[0-9]{2})", file)[0])
        utc_date = datetime.strptime(datestr, "%Y%b%d")
        for line in f.readlines():
            fields = {}
            if line.count(":") < 4: # all observation lines follow this pattern
                continue # skip non-observation lines
            if match := re.match(r"(?P<target_name>.{7,13})\s+(?P<image_num>\d{1,3})\s+(?P<hours>\d\d):(?P<minutes>\d\d)\s+(?P<gain_1>\d{1,3})\s+(?P<gain_2>\d{1,3})\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9.+-]*)\s+(?P<pmdec>[0-9.+-]*)\s+(?P<mag>[0-9.-]+)\s*(?P<notes>.*)", line):
                line_counts["Standard Pattern"] += 1
            elif match := re.match(r"(?P<target_name>\"{0,1}.{7,13}\"{0,1})\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9.+-]*)\s+(?P<pmdec>[0-9.+-]*)\s+(?P<mag>[0-9.-]+)\s*(?P<notes>.*)", line):
                fields["image_num"] = 0
                fields["gain_1"] = 0
                fields["gain_2"] = 0
                line_counts["No Image Num or Gains"] += 1
            elif match := re.match(r"(?P<target_name>.{7})\s+(?P<image_beg>\d{1,3})-(?P<image_end>\d{1,3})\s+(?P<image_ir>\d{1,3})\s+(?P<hours>\d\d):(?P<minutes>\d\d)\s+(?P<gain>\d{1,3})\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9\.+-]*)\s+(?P<pmdec>[0-9\.+-]*)\s+(?P<mag>[0-9\.]+)\s*(?P<notes>.*)", line):
                line_counts["Infrared Observations"] += 1
            elif match := re.match(r"(?P<target_name>\".{7}\")\s+(?P<image_num>\d{1,3})\s+(?P<gain>\d{1,3})\s+(?P<hours>\d\d):(?P<minutes>\d\d)\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9\.+-]*)\s+(?P<pmdec>[0-9\.+-]*)\s+(?P<mag>[0-9\.]+)\s*(?P<notes>.*)", line): #
                line_counts["Single Gain Value"] += 1
            if match:
                fields = {**match.groupdict(), **fields}
                try:
                    if not "pmra" in fields or fields["pmra"] == "":
                        fields["pmra"] = 0
                        fields["pmdec"] = 0
                except Exception as e:
                    pass
                if "gain" in fields: # assume gains are the same for both arms if only one gain specified
                    fields["gain_1"] = fields["gain"]
                    fields["gain_2"] = fields["gain"]
                if "hours" in fields:
                    obs_time = Time(utc_date + timedelta(hours=int(fields["hours"]), minutes=int(fields["minutes"])))
                    datetime_utc = str(obs_time.utc)
                    datetime_jd = obs_time.jd
                else:
                    datetime_utc = ""
                    datetime_jd = 0
                if "image_ir" in fields:
                    observations = [(fields["image_ir"], filter_ir)]
                    observations += [(image_num, filter_x)
                                     for filter_x in [filter_red, filter_blue]
                                     for image_num in range(int(fields["image_beg"]), int(fields["image_end"]) + 1)
                                     ]
                else:
                    image_num = fields["image_num"]
                    observations = [(image_num, filter_blue), (image_num, filter_red)]
                coord = SkyCoord(ra=fields["ra"], dec=fields["dec"], unit=(u.hourangle, u.deg))
                if matches := re.findall('TIC ?(?:ID)? ?=? ?([0-9]+)', line):
                    fields["tic_id"] = "TIC " + matches[0]
                else:
                    fields["tic_id"] = ""
                for (image_num, wavelength) in observations:
                    try:
                        dssi_observations.add_row([fields["target_name"].replace('"', ''), fields["tic_id"], wavelength, image_num, datetime_utc, datetime_jd, fields["gain_1"], fields["gain_2"], coord.ra, coord.dec, float(fields["pmra"]), float(fields["pmdec"]), fields["mag"], fields["notes"]])
                    except Exception as e:
                        print("error:", fields["tic_id"], line[:-1])
            else:
                failed_lines.append(line)
for (regex, count) in line_counts.items():
    print(f"{count:4d} {regex}")
print()
print(len(failed_lines), "failed matches")
for failed_line in failed_lines:
    print(failed_line[:-1])
dssi_observations.sort("Time JD")
dssi_observations.write("DSSI Observations.csv", overwrite=True)


  81 Single Gain Value
 344 No Image Num or Gains
3846 Standard Pattern
 134 Infrared Observations

9 failed matches
HR 3366 053-056 014    0      08:32:42.5 +20:26:28.0      0.000     0.000  5.33  Slit mask
H900004 XXX-XXX 027 12:36 XXX  15:43:48.5 +25:52:38.3   -171.0     317.0   14.33  2MASS J15434848+2552376  -  Pokemon
H900004 XXX-XXX 028 12:43 XXX  15:43:48.5 +25:52:38.3   -171.0     317.0   14.33  2MASS J15434848+2552376  -  Pokemon
HR 0689 116 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  
HR 0689 117 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  -5" in Y (South)
HR 0689 118 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  +5" in Y (North)
HR 0689 119 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  -5" in X (West)
HR 0689 120 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  +5" in X (East)
HR 0689 121 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  -5" in X (West)


## Now collect observations into *sessions*
A session:
* is on the same target
* is one or more speckle observations taken back to back (no other objects targeted in between)
* contains measurements at one or more wavelength, typically 2 but sometimes 3 if the IR arm is used

Each (target + session + wavelength) combination results in one line in the sessions table.

In [146]:
# group the observations by (target, wavelength) sequences

# first, add a speckle session column that changes with each new target
prev_target = ""
speckle_session = 0
dssi_observations["Speckle Session"] = 0
dssi_observations.sort("Time JD")
for observation in dssi_observations:
    target_name = observation["Target Name"]
    if target_name != prev_target:
        speckle_session += 1
        prev_target = target_name
    observation["Speckle Session"] = speckle_session

dssi_observations[["Target Name", "Speckle Session"]]

# now, make a group summary of each wavelength in a speckle session
dssi_sessions = Table(
    names=["Target Name", "TIC ID", "Speckle Session", "Wavelength", "StartTime JD", "MidTime JD", "EndTime JD", "MidTime UTC", "Num Sequences"],
    dtype=["str", "str", "str", "int", "float", "float", "float", "str", "int"],
)

obs_by_session = dssi_observations.group_by(["Speckle Session", "Target Name", "Wavelength", "TIC ID"])
for keys, observations in zip(obs_by_session.groups.keys, obs_by_session.groups):
    start_time = observations["Time JD"].min()
    end_time = observations["Time JD"].max()
    mid_time = (end_time + start_time) / 2
    mid_utc = str(Time(mid_time, format="jd").iso)[:19] if mid_time > 0 else ""
    dssi_sessions.add_row((keys["Target Name"], keys["TIC ID"], str(keys["Speckle Session"]), keys["Wavelength"], start_time, mid_time, end_time, mid_utc, len(observations)))

dssi_sessions.write("DSSI sessions.csv", overwrite=True)
