In [1]:
from collections import Counter
from datetime import datetime, timedelta
from glob import glob
import re
from astropy.coordinates import SkyCoord
from astropy.table import Table
from astropy.time import Time
import astropy.units as u
import numpy as np

## First, explore around to determine how to identify lines representing observations vs other lines in the log files.

In [2]:
# figure out how to find log lines representing observations vs log lines describing procedures
logs_dir = r"..\..\Files\DSSI Logs"
logs_to_process = glob(f"{logs_dir}/**/*.olist", recursive=True)

observation_lines = "DSSI observations.txt"
other_lines = "Non observation lines.txt"

with open(observation_lines, mode="w") as obs, open(other_lines, mode="w") as non_obs:
    for file in logs_to_process:
        datestr = ''.join(re.findall("([0-9]{4})....(...[0-9]{2})", file)[0])
        utc_date = datetime.strptime(datestr, "%Y%b%d")
        with open(file) as f:
            title = f"===========\nFile {file} from {utc_date}\n==========="
            obs.write(title + "\n")
            non_obs.write(title + "\n")
            for line in f.readlines():
                if line.count(":") >= 4: # and line.count("TIC") > 0:
                    obs.write(f"{datestr} {line}")
                else:
                    non_obs.write(line)

# Conclusion: requiring 4 or more colon characters exactly splits the logs

## Now extract all observation lines from all files
* Use more than one regex to match the target lines
  * For speed, make first regex attempted be the one that is empirically observed to match the most lines
* Keep track of lines that "fall through" and aren't matched by any regex pattern
* Standardize things for easier use in analysis
  * Output times both in ISO format and in JD
  * Convert sexagesimal coordinates to decimal degrees
* Depending on if the pattern matching a line includes IR information or not, add a comma-separated list of wavelengths
  * Currently, wavelength values are hard coded at the top of this code.
  * Scanning all log files for lines with "Camera A =" (or B), all files unsurprisingly cite the same set of wavelengths
* Output a CSV of all parsed & standardized observation lines

In [15]:
# now extract useful information from the various lines

logs_dir = r"..\..\Files\DSSI Logs"
logs_to_process = glob(f"{logs_dir}/**/*.olist", recursive=True)
# logs_to_process = ['foo.olist']

wavelengths_optical = "692, 880"
wavelengths_ir = "1450"

dssi_observations = Table(
    names=["Target Name", "TIC ID", "Wavelengths", "Image Number", "UTC DateTime", "Time JD", "Gain 1", "Gain 2", "RA", "Dec", "PMRA", "PMDec", "Mag", "Notes", ],
    dtype=["str", "str", "str", "int", "str", "float", "int", "int", "float", "float", "float", "float", "float", "str", ],
)

line_counts = Counter()
failed_lines = []
for file in logs_to_process:
    with open(file) as f:
        datestr = ''.join(re.findall("([0-9]{4})....(...[0-9]{2})", file)[0])
        utc_date = datetime.strptime(datestr, "%Y%b%d")
        for line in f.readlines():
            fields = {}
            if line.count(":") < 4: # all observation lines follow this pattern
                continue # skip non-observation lines
            if match := re.match(r"(?P<target_name>.{7,13})\s+(?P<image_num>\d{1,3})\s+(?P<hours>\d\d):(?P<minutes>\d\d)\s+(?P<gain_1>\d{1,3})\s+(?P<gain_2>\d{1,3})\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9.+-]*)\s+(?P<pmdec>[0-9.+-]*)\s+(?P<mag>[0-9.-]+)\s*(?P<notes>.*)", line):
                line_counts["Standard Pattern"] += 1
            elif match := re.match(r"(?P<target_name>\"{0,1}.{7,13}\"{0,1})\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9.+-]*)\s+(?P<pmdec>[0-9.+-]*)\s+(?P<mag>[0-9.-]+)\s*(?P<notes>.*)", line):
                fields["image_num"] = 0
                fields["gain_1"] = 0
                fields["gain_2"] = 0
                line_counts["No Image Num or Gains"] += 1
            elif match := re.match(r"(?P<target_name>.{7})\s+(?P<image_beg>\d{1,3})-(?P<image_end>\d{1,3})\s+(?P<image_ir>\d{1,3})\s+(?P<hours>\d\d):(?P<minutes>\d\d)\s+(?P<gain>\d{1,3})\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9\.+-]*)\s+(?P<pmdec>[0-9\.+-]*)\s+(?P<mag>[0-9\.]+)\s*(?P<notes>.*)", line):
                line_counts["Infrared Observations"] += 1
            elif match := re.match(r"(?P<target_name>\".{7}\")\s+(?P<image_num>\d{1,3})\s+(?P<gain>\d{1,3})\s+(?P<hours>\d\d):(?P<minutes>\d\d)\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9\.+-]*)\s+(?P<pmdec>[0-9\.+-]*)\s+(?P<mag>[0-9\.]+)\s*(?P<notes>.*)", line): #
                line_counts["Single Gain Value"] += 1
            if match:
                fields = {**match.groupdict(), **fields}
                try:
                    if not "pmra" in fields or fields["pmra"] == "":
                        fields["pmra"] = 0
                        fields["pmdec"] = 0
                except Exception as e:
                    pass
                if "gain" in fields: # assume gains are the same for both arms if only one gain specified
                    fields["gain_1"] = fields["gain"]
                    fields["gain_2"] = fields["gain"]
                if "hours" in fields:
                    obs_time = Time(utc_date + timedelta(hours=int(fields["hours"]), minutes=int(fields["minutes"])))
                    datetime_utc = str(obs_time.utc)
                    datetime_jd = obs_time.jd
                else:
                    datetime_utc = str(utc_date)
                    datetime_jd = 0
                if "image_ir" in fields:
                    observations = [(image_num, wavelengths_optical) for image_num in range(int(fields["image_beg"]), int(fields["image_end"]) + 1)]
                    observations.append((fields["image_ir"], wavelengths_ir))
                else:
                    observations = [(fields["image_num"], wavelengths_optical)]
                coord = SkyCoord(ra=fields["ra"], dec=fields["dec"], unit=(u.hourangle, u.deg))
                if matches := re.findall('TIC ?(?:ID)? ?=? ?([0-9]+)', line):
                    fields["tic_id"] = "TIC " + matches[0]
                else:
                    fields["tic_id"] = ""
                for image_num, wavelengths in observations:
                    try:
                        dssi_observations.add_row([fields["target_name"].replace('"', ''), fields["tic_id"], wavelengths, image_num, datetime_utc, datetime_jd, fields["gain_1"], fields["gain_2"], coord.ra, coord.dec, float(fields["pmra"]), float(fields["pmdec"]), fields["mag"], fields["notes"]])
                    except Exception as e:
                        print("error:", fields["tic_id"], line[:-1])
            else:
                failed_lines.append(line)
total_observation_lines = 0
for (regex, count) in line_counts.items():
    print(f"{count:4d} {regex}")
    total_observation_lines += count
print(f"Total lines identified as observations: {total_observation_lines}")
print()
print(len(failed_lines), "failed matches")
for failed_line in failed_lines:
    print(failed_line[:-1])
dssi_observations.sort("Time JD")
dssi_observations.write("DSSI Observations.csv", overwrite=True)


  81 Single Gain Value
 344 No Image Num or Gains
3846 Standard Pattern
 134 Infrared Observations
Total lines identified as observations: 4405

9 failed matches
HR 3366 053-056 014    0      08:32:42.5 +20:26:28.0      0.000     0.000  5.33  Slit mask
H900004 XXX-XXX 027 12:36 XXX  15:43:48.5 +25:52:38.3   -171.0     317.0   14.33  2MASS J15434848+2552376  -  Pokemon
H900004 XXX-XXX 028 12:43 XXX  15:43:48.5 +25:52:38.3   -171.0     317.0   14.33  2MASS J15434848+2552376  -  Pokemon
HR 0689 116 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  
HR 0689 117 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  -5" in Y (South)
HR 0689 118 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  +5" in Y (North)
HR 0689 119 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  -5" in X (West)
HR 0689 120 08:            02:21:56.6 +00:23:45.0      0.000     0.000  5.00  +5" in X (East)
HR 0689 121 08:            02:21:56.6 +00:23:45.0     

The above count of lines of observations is smaller than the number of lines output to the `DSSI observations.csv` file.  This is because each line of IR observation results in *n* lines of optical observation and one line of IR observation.  Doing otherwise would require throwing away the image numbers.  Preserving them in the CSV file makes future projects easier, such as correlating the FITS files from the cameras to these records of observations.

## Now collect observations into *sessions*
A session:
* is on the same target
* is one or more speckle observations taken back to back (no other objects targeted in between)
* contains measurements at one or more wavelength, typically 2 but sometimes 3 if the IR arm is used

Each (target + session) combination results in one line in the sessions table.

In [30]:
# group the observations by (target, wavelength) sequences

# first, add a speckle session column that changes with each new target
prev_target = ""
speckle_session = 0
dssi_observations["Speckle Session"] = 0
dssi_observations.sort("Time JD")
for observation in dssi_observations:
    target_name = observation["Target Name"]
    if target_name != prev_target:
        speckle_session += 1
        prev_target = target_name
    observation["Speckle Session"] = speckle_session

dssi_observations

Target Name,TIC ID,Wavelengths,Image Number,UTC DateTime,Time JD,Gain 1,Gain 2,RA,Dec,PMRA,PMDec,Mag,Notes,Speckle Session
str13,str13,str8,int32,str19,float64,int32,int32,float64,float64,float64,float64,float64,str55,int32
HR 6935,,"692, 880",0,2023-05-13 00:00:00,0.0,0,0,277.4208333333333,-1.985277777777778,0.0,0.0,5.39,,1
H300056,TIC 97356407,"692, 880",0,2022-09-30 00:00:00,0.0,0,0,106.5020833333333,-30.65572222222222,-11.613,5.244,6.3,TIC=97356407,2
HR 2756,,"692, 880",0,2022-09-30 00:00:00,0.0,0,0,108.83749999999999,-30.68638888888889,-13.502,14.36,5.36,,3
H300057,TIC 317863971,"692, 880",0,2022-09-30 00:00:00,0.0,0,0,110.56749999999998,3.0319166666666666,-1.845,-3.917,10.31,TIC=317863971,4
HR 2880,,"692, 880",0,2022-09-30 00:00:00,0.0,0,0,113.02458333333331,1.9144444444444444,-2.129,-0.868,5.25,,5
H300058,TIC 125952257,"692, 880",0,2022-09-30 00:00:00,0.0,0,0,115.42666666666665,-27.58261111111111,-2.476,3.789,11.54,TIC=125952257,6
HR 3043,,"692, 880",0,2022-09-30 00:00:00,0.0,0,0,117.25708333333331,-24.91222222222222,-28.489,23.378,5.33,,7
H300059,TIC 130276377,"692, 880",0,2022-09-30 00:00:00,0.0,0,0,119.82583333333332,-28.378972222222224,-2.153,2.57,12.1,TIC=130276377,8
HR 3170,,"692, 880",0,2022-09-30 00:00:00,0.0,0,0,121.06749999999998,-32.675,-5.736,6.83,5.31,,9
H300102,TIC 266657256,"692, 880",0,2022-09-30 00:00:00,0.0,0,0,119.26666666666665,4.1867222222222225,-0.867,-1.952,13.17,TIC=266657256,10


In [31]:

# now, make a group summary of each wavelength in a speckle session
dssi_sessions = Table(
    names=["Target Name", "TIC ID", "Speckle Session", "StartTime JD", "MidTime JD", "EndTime JD", "MidTime UTC", "Num Sequences"],
    dtype=["str", "str", "str", "float", "float", "float", "str", "int"],
)

obs_by_session = dssi_observations.group_by(["Speckle Session", "Target Name", "TIC ID"])
for keys, observations in zip(obs_by_session.groups.keys, obs_by_session.groups):
    start_time = observations["Time JD"].min()
    end_time = observations["Time JD"].max()
    mid_time = (end_time + start_time) / 2
    mid_utc = str(Time(mid_time, format="jd").iso)[:19] if mid_time > 0 else ""
    dssi_sessions.add_row((keys["Target Name"], keys["TIC ID"], str(keys["Speckle Session"]), start_time, mid_time, end_time, mid_utc, len(observations)))

dssi_sessions.write("DSSI sessions.csv", overwrite=True)
dssi_sessions

Target Name,TIC ID,Speckle Session,StartTime JD,MidTime JD,EndTime JD,MidTime UTC,Num Sequences
str13,str13,str4,float64,float64,float64,str19,int32
HR 6935,,1,0.0,0.0,0.0,,1
H300056,TIC 97356407,2,0.0,0.0,0.0,,1
HR 2756,,3,0.0,0.0,0.0,,1
H300057,TIC 317863971,4,0.0,0.0,0.0,,1
HR 2880,,5,0.0,0.0,0.0,,1
H300058,TIC 125952257,6,0.0,0.0,0.0,,1
HR 3043,,7,0.0,0.0,0.0,,1
H300059,TIC 130276377,8,0.0,0.0,0.0,,1
HR 3170,,9,0.0,0.0,0.0,,1
H300102,TIC 266657256,10,0.0,0.0,0.0,,1


For observations of TIC objects, it looks like the average number of sequences is 6.24, a bit higher than I expected.