In [12]:
from collections import Counter
from datetime import datetime, timedelta
from glob import glob
import re
from astropy.coordinates import SkyCoord
from astropy.table import Table
from astropy.time import Time
import astropy.units as u
import numpy as np

## First, explore around to determine how to identify lines representing observations vs other lines in the log files.

In [13]:
# figure out how to find log lines representing observations vs log lines describing procedures
logs_dir = r"..\..\Files\DSSI Files\DSSI Logs"
logs_to_process = glob(f"{logs_dir}/**/*.olist", recursive=True)

observation_lines = "Observations lines.txt"
other_lines = "Non observation lines.txt"

with open(observation_lines, mode="w") as obs, open(other_lines, mode="w") as non_obs:
    for file in logs_to_process:
        datestr = ''.join(re.findall("([0-9]{4})....(...[0-9]{2})", file)[0])
        utc_date = datetime.strptime(datestr, "%Y%b%d")
        with open(file) as f:
            title = f"===========\nFile {file} from {utc_date}\n==========="
            obs.write(title + "\n")
            non_obs.write(title + "\n")
            for line in f.readlines():
                if line.count(":") >= 4: # and line.count("TIC") > 0:
                    obs.write(f"{datestr} {line}")
                else:
                    non_obs.write(line)

# Conclusion: requiring 4 or more colon characters exactly splits the logs

## Now extract all observation lines from all files
* Use more than one regex to match the target lines
  * For speed, make first regex attempted be the one that is empirically observed to match the most lines
* Keep track of lines that "fall through" and aren't matched by any regex pattern
* Standardize things for easier use in analysis
  * Output times both in ISO format and in JD
  * Convert sexagesimal coordinates to decimal degrees
* Depending on if the pattern matching a line includes IR information or not, add a comma-separated list of wavelengths
  * Currently, wavelength values are hard coded at the top of this code.
  * Scanning all log files for lines with "Camera A =" (or B), all files unsurprisingly cite the same set of wavelengths
* Output a CSV of all parsed & standardized observation lines

In [17]:
# now extract useful information from the various lines

# logs_dir = r"..\..\Files\DSSI Files\DSSI Logs"
# logs_to_process = glob(f"{logs_dir}/**/*.olist", recursive=True)

wavelengths_optical = "692, 880"
wavelengths_ir = "1450"

dssi_observations = Table(
    names=["Target Name", "TIC ID", "Wavelengths", "Image Number", "UTC DateTime", "Time JD", "Gain 1", "Gain 2", "RA", "Dec", "PMRA", "PMDec", "Mag", "Notes", ],
    dtype=["str", "str", "str", "int", "str", "float", "int", "int", "float", "float", "float", "float", "float", "str", ],
)

line_counts = Counter()

failed_lines = []
for file in logs_to_process:
    with open(file) as f:
        file_matches = 0
        datestr = ''.join(re.findall("([0-9]{4})....(...[0-9]{2})", file)[0])
        utc_date = datetime.strptime(datestr, "%Y%b%d")
        for line in f.readlines():
            if "TIC 204698586" in line:
                pass
            fields = {}
            if line.count(":") < 4: # all observation lines follow this pattern
                continue # skip non-observation lines
            if match := re.match(r"(?P<target_name>.{7,13})\s+(?P<image_num>\d{1,3})\s+(?P<hours>\d\d):(?P<minutes>\d\d)\s+(?P<gain_1>\d{1,3})\s+(?P<gain_2>\d{1,3})\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9.+-]*)\s+(?P<pmdec>[0-9.+-]*)\s+(?P<mag>[0-9.-]+)\s*(?P<notes>.*)", line):
                line_counts["Standard Pattern"] += 1
            elif match := re.match(r"(?P<target_name>\"{0,1}.{7,13}\"{0,1})\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9.+-]*)\s+(?P<pmdec>[0-9.+-]*)\s+(?P<mag>[0-9.-]+)\s*(?P<notes>.*)", line):
                line_counts["No Image Num, Gains, or Time"] += 1
                # fields["image_num"] = 0
                # fields["gain"] = 0
                continue # per Jimmy, these were possible targets that never resulted in an actual observation
            elif match := re.match(r"(?P<target_name>.{7})\s+(?P<image_beg>\d{1,3})-(?P<image_end>\d{1,3})\s+(?P<image_ir>\d{1,3})\s+(?P<hours>\d\d):(?P<minutes>\d\d)\s+(?P<gain>\d{1,3})\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9\.+-]*)\s+(?P<pmdec>[0-9\.+-]*)\s+(?P<mag>[0-9\.]+)\s*(?P<notes>.*)", line):
                line_counts["Infrared Observations"] += 1
            elif match := re.match(r"(?P<target_name>.{7,15})\s+(?P<image_num>\d{1,4})\s+(?P<gain>\d{1,3})\s+(?P<hours>\d\d):(?P<minutes>\d\d)\s+(?P<ra>\d\d:\d\d:\d\d\.\d+)\s+(?P<dec>[+|-]{0,1}\d\d:\d\d:\d\d\.\d+)\s+(?P<pmra>[0-9\.+-]*)\s+(?P<pmdec>[0-9\.+-]*)\s+(?P<mag>[0-9\.]+)\s*(?P<notes>.*)", line): #
                line_counts["Single Gain Value"] += 1
# TIC 204698586      025  300 04:52 12:22:12.4 -24:13:21.7     0.115     0.532  11.209   
            if match:
                file_matches += 1
                fields = {**match.groupdict(), **fields}
                try:
                    if not "pmra" in fields or fields["pmra"] == "":
                        fields["pmra"] = 0
                        fields["pmdec"] = 0
                except Exception as e:
                    pass
                if "gain" in fields: # assume gains are the same for both arms if only one gain specified
                    fields["gain_1"] = fields["gain"]
                    fields["gain_2"] = fields["gain"]
                if "hours" in fields:
                    obs_time = Time(utc_date + timedelta(hours=int(fields["hours"]), minutes=int(fields["minutes"])))
                    datetime_utc = str(obs_time.utc)
                    datetime_jd = obs_time.jd
                else:
                    datetime_utc = str(utc_date)
                    datetime_jd = 0
                if "image_ir" in fields:
                    observations = [(image_num, wavelengths_optical) for image_num in range(int(fields["image_beg"]), int(fields["image_end"]) + 1)]
                    observations.append((fields["image_ir"], wavelengths_ir))
                else:
                    observations = [(fields["image_num"], wavelengths_optical)]
                coord = SkyCoord(ra=fields["ra"], dec=fields["dec"], unit=(u.hourangle, u.deg))
                if matches := re.findall('TIC ?(?:ID)? ?=? ?([0-9]+)', line):
                    fields["tic_id"] = "TIC " + matches[0]
                elif fields["target_name"][:3] == "TIC":
                    fields["tic_id"] = f"TIC {fields["target_name"][3:].strip()}"
                else:
                    fields["tic_id"] = ""
                for image_num, wavelengths in observations:
                    try:
                        dssi_observations.add_row([fields["target_name"].replace('"', ''), fields["tic_id"], wavelengths, image_num, datetime_utc, datetime_jd, fields["gain_1"], fields["gain_2"], coord.ra, coord.dec, float(fields["pmra"]), float(fields["pmdec"]), fields["mag"], fields["notes"]])
                    except Exception as e:
                        print("error:", fields["tic_id"], line[:-1])
            else:
                failed_lines.append(line)
        print(f"{file_matches:4d} {f.name}")
total_observation_lines = 0
for (regex, count) in line_counts.items():
    print(f"{count:4d} {regex}")
    total_observation_lines += count
print(f"Total lines identified as observations: {total_observation_lines}")
print()
print(len(failed_lines), "failed matches")
for failed_line in failed_lines:
    print(failed_line[:-1])
dssi_observations.sort("Time JD")
dssi_observations.write("DSSI Observations.csv", overwrite=True)


  self.insert_row(len(self), vals, mask)


  50 ..\..\Files\DSSI Files\DSSI Logs\2022-Q2\may10raw.olist
error:  "HR 5635"           106   20 08:49 15:06:16.7 +54:33:22.7  6  1.2061
  56 ..\..\Files\DSSI Files\DSSI Logs\2022-Q2\may11raw.olist
  63 ..\..\Files\DSSI Files\DSSI Logs\2022-Q2\may12raw.olist
  49 ..\..\Files\DSSI Files\DSSI Logs\2022-Q2\may15raw.olist
  81 ..\..\Files\DSSI Files\DSSI Logs\2022-Q3\sep27raw.olist
 115 ..\..\Files\DSSI Files\DSSI Logs\2022-Q3\sep28raw.olist
 281 ..\..\Files\DSSI Files\DSSI Logs\2022-Q3\sep29raw.olist
 136 ..\..\Files\DSSI Files\DSSI Logs\2022-Q3\sep30raw.olist
 137 ..\..\Files\DSSI Files\DSSI Logs\2022-Q4\nov12raw.olist
 116 ..\..\Files\DSSI Files\DSSI Logs\2022-Q4\nov13raw.olist
   0 ..\..\Files\DSSI Files\DSSI Logs\2022-Q4\nov14raw.olist
 279 ..\..\Files\DSSI Files\DSSI Logs\2022-Q4\nov15raw.olist
 109 ..\..\Files\DSSI Files\DSSI Logs\2022-Q4\nov16raw.olist
  81 ..\..\Files\DSSI Files\DSSI Logs\2023-Q1\mar07raw.olist
 203 ..\..\Files\DSSI Files\DSSI Logs\2023-Q1\mar08raw.olist
 175 ..\

The above count of lines of observations is smaller than the number of lines output to the `DSSI observations.csv` file.  This is because each line of IR observation results in *n* lines of optical observation and one line of IR observation.  Doing otherwise would require throwing away the image numbers.  Preserving them in the CSV file makes future projects easier, such as correlating the FITS files from the cameras to these records of observations.

## Now collect observations into *sessions*
A session:
* is on the same target
* is one or more speckle observations taken back to back (no other objects targeted in between)
* contains measurements at one or more wavelength, typically 2 but sometimes 3 if the IR arm is used

Each (target + session) combination results in one line in the sessions table.

In [15]:
# group the observations by (target, wavelength) sequences

# first, add a speckle session column that changes with each new target
prev_target = ""
speckle_session = 0
dssi_observations["Speckle Session"] = 0
dssi_observations.sort("Time JD")
for observation in dssi_observations:
    target_name = observation["Target Name"]
    if target_name != prev_target:
        speckle_session += 1
        prev_target = target_name
    observation["Speckle Session"] = speckle_session

dssi_observations

Target Name,TIC ID,Wavelengths,Image Number,UTC DateTime,Time JD,Gain 1,Gain 2,RA,Dec,PMRA,PMDec,Mag,Notes,Speckle Session
str15,str13,str8,int64,str19,float64,int64,int64,float64,float64,float64,float64,float64,str55,int64
HR 4717,,"692, 880",1,2022-05-10 03:24:00,2459709.6416666666,50,50,186.07708333333332,26.09861111111111,-26.158,-9.385,5.18,,1
RKS1227+2701,,"692, 880",2,2022-05-10 03:28:00,2459709.644444444,200,200,186.80708333333328,27.024666666666665,94.057,-248.007,8.47,pri2 36.467 0.060,2
RKS1230+2640,,"692, 880",3,2022-05-10 03:32:00,2459709.6472222223,200,200,187.51041666666666,26.673472222222223,-5.957,10.917,11.06,pri3 20.236 0.414,3
HIP058056,,"692, 880",4,2022-05-10 03:39:00,2459709.652083333,300,300,178.60708333333332,15.253388888888889,33.19,-164.66,10.45,Horch/OrbCand/Xavier,4
RKS1232+1045,,"692, 880",5,2022-05-10 03:42:00,2459709.654166667,300,300,188.13874999999996,10.765805555555556,13.609,-9.69,10.77,pri3 24.798 0.045,5
HR 4851,,"692, 880",6,2022-05-10 03:47:00,2459709.657638889,20,20,191.66125,16.5775,14.359,15.903,5.12,,6
HR 4781,,"692, 880",7,2022-05-10 03:51:00,2459709.660416667,20,20,188.445,-9.451944444444443,-83.46,0.22,5.48,,7
RKS1240-0332,,"692, 880",8,2022-05-10 03:53:00,2459709.6618055557,300,300,190.00124999999997,-3.533888888888889,36.339,-40.108,10.71,pri3 20.336 0.069,8
RKS1243-0242,,"692, 880",9,2022-05-10 03:55:00,2459709.6631944445,250,250,190.84124999999997,-2.7148611111111114,-118.216,-89.762,9.21,pri3 20.615 0.050,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [16]:

# now, make a group summary of each wavelength in a speckle session
dssi_sessions = Table(
    names=["Target Name", "TIC ID", "Speckle Session", "StartTime JD", "MidTime JD", "EndTime JD", "MidTime UTC", "Num Sequences"],
    dtype=["str", "str", "str", "float", "float", "float", "str", "int"],
)

obs_by_session = dssi_observations.group_by(["Speckle Session", "Target Name", "TIC ID"])
for keys, observations in zip(obs_by_session.groups.keys, obs_by_session.groups):
    start_time = observations["Time JD"].min()
    end_time = observations["Time JD"].max()
    mid_time = (end_time + start_time) / 2
    mid_utc = str(Time(mid_time, format="jd").iso)[:19] if mid_time > 0 else ""
    dssi_sessions.add_row((keys["Target Name"], keys["TIC ID"], str(keys["Speckle Session"]), start_time, mid_time, end_time, mid_utc, len(observations)))

dssi_sessions.write("DSSI sessions.csv", overwrite=True)
dssi_sessions

Target Name,TIC ID,Speckle Session,StartTime JD,MidTime JD,EndTime JD,MidTime UTC,Num Sequences
str15,str13,str4,float64,float64,float64,str19,int64
HR 4717,,1,2459709.6416666666,2459709.6416666666,2459709.6416666666,2022-05-10 03:24:00,1
RKS1227+2701,,2,2459709.644444444,2459709.644444444,2459709.644444444,2022-05-10 03:28:00,1
RKS1230+2640,,3,2459709.6472222223,2459709.6472222223,2459709.6472222223,2022-05-10 03:32:00,1
HIP058056,,4,2459709.652083333,2459709.652083333,2459709.652083333,2022-05-10 03:39:00,1
RKS1232+1045,,5,2459709.654166667,2459709.654166667,2459709.654166667,2022-05-10 03:42:00,1
HR 4851,,6,2459709.657638889,2459709.657638889,2459709.657638889,2022-05-10 03:47:00,1
HR 4781,,7,2459709.660416667,2459709.660416667,2459709.660416667,2022-05-10 03:51:00,1
RKS1240-0332,,8,2459709.6618055557,2459709.6618055557,2459709.6618055557,2022-05-10 03:53:00,1
RKS1243-0242,,9,2459709.6631944445,2459709.6631944445,2459709.6631944445,2022-05-10 03:55:00,1
...,...,...,...,...,...,...,...


For observations of TIC objects, it looks like the average number of sequences is 6.24, a bit higher than I expected.