caksearch.py, several upgrades

Still not there but getting that way
HiPERCAM · Mar 28, 2022 · 5e2eafb · 5e2eafb
1 parent 00bec41
commit 5e2eafb
Showing 1 changed file with 92 additions and 26 deletions.
diff --git a/hipercam/scripts/calsearch.py b/hipercam/scripts/calsearch.py
@@ -30,8 +30,12 @@ def calsearch(args=None):
     """``calsearch runs output``
 
     Given a csv file from |logsearch| (possibly with rows edited, but
-    all the same columns), this searches for matching calibration
-    files. This is to aid data export.
+    with the same columns), this searches for matching calibration
+    files. This is to aid data export. It first searches for flat
+    fields, then for the combined list of data files and flats, it
+    searches for bias frames. The flats do not try to determine if
+    the filter matches in the case of ultracam because the filters
+    can be unreliable.
 
     It works by searching through the database .db files which are
     accessed by password in the same way vias access to your keyring
@@ -42,31 +46,44 @@ def calsearch(args=None):
        runs : str
           csv input file
 
-       output : str
-          Name of CSV file to store the results.
+       diff : int
+          maximum time difference in days to allow between a frame and
+          a matching calibration file. Mainly here to prevent
+          excessive numbers of matches. The "night" column is
+          used. Thus diff=0 only allows calibrations from the same
+          night to be considered, but diff=1 allows any preceding or
+          following nights too.
 
-    """
+       output : str
+          Name of CSV file to store the results. The results include
+          the original runs along with matching flats and biases and
+          biases for the flats as well. This is readable by oocalc for
+          instance (UTF-8, semi-colon separators disabled).  """
 
     command, args = utils.script_args(args)
 
     with Cline("HIPERCAM_ENV", ".hipercam", command, args) as cl:
 
         # register parameters
         cl.register("runs", Cline.LOCAL, Cline.PROMPT)
+        cl.register("diff", Cline.LOCAL, Cline.PROMPT)
         cl.register("output", Cline.LOCAL, Cline.PROMPT)
 
         runs = cl.get_value(
             "runs", "input csv file of runs",
             cline.Fname('results', '.csv')
         )
+        diff = cl.get_value(
+            "diff", "maximum time difference for matching calibrations (days)", 10, 0
+        )
         output = cl.get_value(
             "output", "name of spreadsheet of results ['none' to ignore]",
             cline.Fname('results', '.csv', cline.Fname.NEW), ignore="none"
         )
 
     # Read the runs into pandas
     runs_df = pd.read_csv(runs)
-    
+
     # Get database files.
 
     # First create directory for them if need be
@@ -139,13 +156,14 @@ def calsearch(args=None):
     print()
 
     # write runs to be checked to junk file. this is because
-    # i can't get in memory option to work
+    # i can't get in memory option to work for some reason
     dbname = 'zzz_junk.db'
     cnx = sqlite3.connect(dbname)
     runs_df.to_sql(name='tab', con=cnx, if_exists='replace')
     cnx.commit()
     cnx.close()
 
+    # Search for flat field frames
     results = []
     for dbase, dtable in dbases:
 
@@ -160,42 +178,90 @@ def calsearch(args=None):
         # Build query string to locate matching bias frames.
         #
         # Designed to:
+        #
+        # 1) only return entries from big table
+        # 2) only from matching observing runs
+        # 3) should not be the same run
+        # 4) should match read speed and binning
+        # 5) have more than 10 frames
+        # 6) have some indication by name, type or comment that it is a bias.
+        # 7) main table referred to as m, runs we are looking for biases for as t.
+        # 8) main table referred to as m, runs we are looking for biases for as t.
+
+        query = f'SELECT DISTINCT m.* FROM main.{dtable} AS m\n'
+        query += f"""INNER JOIN runs.tab AS t
+ON (m.obs_run = t.obs_run AND m.instrument = t.instrument)
+WHERE (m.night != t.night OR m.run_no != t.run_no)
+AND m.binning = '1x1' AND m.nframe > 10
+AND (m.target LIKE '%flat%' OR m.run_type = 'flat' OR m.comment LIKE '%flat%')
+AND ABS(JULIANDAY(m.night)-JULIANDAY(t.night)) <= {diff}
+AND ((m.sun_alt_start IS NULL OR (m.sun_alt_start > -15 AND m.sun_alt_start < 0))
+OR (m.sun_alt_end IS NULL OR (m.sun_alt_end > -15 AND m.sun_alt_end < 0)))
+"""
+        if dtable == 'ultraspec':
+            query += f
+
+        print(f'Searching for flats in "{dbase}" with SQL code:\n\n{query}\n')
+
+        res = pd.read_sql_query(query, conn)
+        if len(res):
+            print(res)
+            results.append(res)
+
+        # close connection
+        conn.close()
+
+    # Add the "flats" to the selected runs.
+    dbname = 'zzz_junk.db'
+    cnx = sqlite3.connect(dbname)
+    for res in results:
+        res.to_sql(name='tab', con=cnx, if_exists='append')
+    cnx.commit()
+    cnx.close()
+
+    # Now search the whole lot for bias frames
+    results = [runs_df,]
+    for dbase, dtable in dbases:
+
+        # connect to big database
+        conn = sqlite3.connect(f"file:{dbase}?mode=ro", uri=True)
+
+        # Add database / table runs.tab representing the runs we wish
+        # to search over
+        cursor = conn.cursor()
+        cursor.execute(f'ATTACH "{dbname}" as runs')
+
+        # Build query string to locate matching bias frames.
+        #
+        # Designed to:
+        #
         # 1) only return entries from big table
         # 2) only from matching observing runs
         # 3) should not be the same run
         # 4) should match read speed and binning
         # 5) have more than 10 frames
         # 6) have some indication by name, type or comment that it is a bias.
+        # 7) main table referred to as m, runs we are looking for biases for as t.
+        # 8) main table referred to as m, runs we are looking for biases for as t.
 
         query = f'SELECT DISTINCT m.* FROM main.{dtable} AS m\n'
-        query += """INNER JOIN runs.tab AS t
+        query += f"""INNER JOIN runs.tab AS t
 ON (m.obs_run = t.obs_run AND m.instrument = t.instrument)
 WHERE (m.night != t.night OR m.run_no != t.run_no)
 AND m.read_speed = t.read_speed AND m.binning = t.binning AND m.nframe > 10
-AND (m.target LIKE '%bias%' OR m.run_type = 'bias' OR m.comment LIKE '%bias%')"""
+AND (m.target LIKE '%bias%' OR m.run_type = 'bias' OR m.comment LIKE '%bias%')
+AND ABS(JULIANDAY(m.night)-JULIANDAY(t.night)) <= {diff}"""
 
-        #        query += 'WHERE obs_run = temp.t.obs_run AND (night != temp.t.night OR run_no != temp.t.run_no)\n'
+        print(f'Searching for biases in "{dbase}" with SQL code:\n\n{query}\n')
 
         res = pd.read_sql_query(query, conn)
         if len(res):
             print(res)
-            #        query = f'SELECT * FROM main.{dtable}\n'
-            #        query += (
-            #            f"WHERE (obs_run = '{obs_run}') AND (night != '{night}' OR run_no != '{run_no}')\n"
-            #                f"AND (read_speed = '{read_speed}' AND binning = '{binning}' AND nframe > 5\n"
-            #                f"AND (target LIKE '%bias%' OR run_type = 'bias' OR comment LIKE '%bias%') )\n"
-            #            )
-            #        query += (
-            #            f"WHERE (obs_run = '{obs_run}') AND (night != '{night}' OR run_no != '{run_no}')\n"
-            #                f"AND (read_speed = '{read_speed}' AND binning = '{binning}' AND nframe > 5\n"
-            #                f"AND (target LIKE '%bias%' OR run_type = 'bias' OR comment LIKE '%bias%') )\n"
-            #            )
-
-        #conn.create_function("REGEXP", 2, regexp)
-        #query += f'WHERE (REGEXP("{regex}",target) AND total > {tmin})'
+            results.append(res)
 
         # close connection
         conn.close()
 
-    #total = pd.concat(results,sort=False)
-    #total.to_csv(output)
+    # Save the results.
+    biglist = pd.concat(results,sort=False)
+    biglist.to_csv(output)