logsearch, automated the database download

No longer need to specify locations of the databases; it will attempt to download them from the Warwick server
HiPERCAM · Dec 20, 2021 · 6beafa0 · 6beafa0
1 parent 72c55e2
commit 6beafa0
Showing 1 changed file with 100 additions and 71 deletions.
diff --git a/hipercam/scripts/logsearch.py b/hipercam/scripts/logsearch.py
@@ -1,6 +1,10 @@
 import sys
 import re
 import sqlite3
+import os
+import keyring
+import getpass
+import subprocess
 
 import numpy as np
 import pandas as pd
@@ -22,70 +26,68 @@
 
 def logsearch(args=None):
     description = \
-    """``logsearch target (dmax) regex (nocase) tmin [hcamdb ucamdb
-    uspecdb] output``
+    """``logsearch target (dmax) regex (nocase) tmin output``
 
     Searches for targets in the |hiper| and |ucam| logs. It can carry
     out a coordinate lookup given a name and/or carry out a regular
     expression search. It uses the sqlite3 databases generated by
-    |hlogger| which can be downloaded from the (password protected)
-    log webpages hosted at Warwick.
+    |hlogger| which it will try to download from the log webpages
+    hosted at Warwick. You will need to know passwords to access these.
 
     If a target name is entered, it will first searched for in
     SIMBAD. If that fails, it will be searched for coordinates in the
     form "J123456.7-123456" or similar, so the latter is always the
-    fallback for objects that don't appear in SIMBAD. It can also search
-    by regular expression matching.
+    fallback for objects that don't appear in SIMBAD. It can also
+    search by regular expression matching.
 
     Arguments::
 
        target : str
           Target name. On the command line, must be enclosed in quotes if it
           contains spaces. This will be used first to carry out a lookup in
-          SIMBAD to find the RA and Dec. Failing this it tries to identify
+          SIMBAD to find the RA and Dec. Failing this, it tries to identify
           coordinates from a final strength of the form JHHMMSS.S[+/-]DDMMSS
           Enter "none" to ignore.
 
        tmin : float
-          Minimum exposure duration seconds to cut down on chaff.
+          Minimum exposure duration seconds to cut out short runs.
 
        dmax : float
           Maximum distance from lookup position, arcminutes
 
        regex : str
-          Regular expression to use to try to match target names in addition to
-          the coordinate matching. "none" to ignore.
+          Regular expression to use to try to match target names in
+          addition to the coordinate matching. "none" to ignore.
 
        nocase : bool [if regex is not "none"]
-          True for case-insensitive matching, else case-sensitive used with regex
-
-       hcamdb : database [hidden]
-          Path to |hiper| database which will probably be called hipercam.db if
-          downloaded from the Warwick logs. 'none' to ignore. Best to specify
-          the full path when setting this to allow searches to be undertaken from
-          any directory.
-
-       ucamdb : database [hidden]
-          Path to ULTRACAM database which will probably be called ultracam.db if
-          downloaded from the Warwick logs. 'none' to ignore. Best to specify
-          the full path when setting this to allow searches to be undertaken from
-          any directory.
-
-       uspecdb : database [hidden]
-          Path to ULTRASPEC database which will probably be called ultraspec.db if
-          downloaded from the Warwick logs. 'none' to ignore. Best to specify
-          the full path when setting this to allow searches to be undertaken from
-          any directory.
+          True for case-insensitive matching, else case-sensitive used
+          with regex
 
        output : str
-          Name of CSV file to store the results. 'none' to ignore. The
-          results are best viewed in an excel-type programme or
-          topcat, or they can be read programatically into a pandas
-          Dataframe using pd.read_csv('results.csv'). Results from all
-          instruments are concatenated which for instance means that a
-          column appropriate for hipercam, might be blank for ULTRACAM
-          and vice versa. An extra "Instrument" column is added to
-          make the origin clear.  """
+          Name of CSV file to store the results. 'none' to
+          ignore. Usually you will want to specify this since the
+          results are too wide to print to screen. Assuming you do
+          save the results, they are best viewed in an excel-type
+          programme or topcat, or they can be read programatically
+          into a pandas Dataframe using pd.read_csv('results.csv').
+          Column names from all instruments are concatenated which for
+          instance means that a column appropriate for hipercam, might
+          be blank for ULTRACAM and vice versa. An extra "Instrument"
+          column is added to make the origin clear.
+
+    .. Note::
+
+       The program will attempt to download the databases from the
+       Warwick server. Since they are often updated, it will always
+       check, but only download if the server files are newer than the
+       local versions. The downloads are stored in
+       $HOME/.hipercam/dbases [or $HIPERCAM_ENV/dbases if you have set
+       HIPERCAM_ENV].  You will need to know the passwords. They will be stored
+       in your keyring in a folder called "Data logs". If the
+       passwords change, you will have to delete those stored in your
+       keyring.
+
+    """
 
     command, args = utils.script_args(args)
 
@@ -97,9 +99,6 @@ def logsearch(args=None):
         cl.register("regex", Cline.LOCAL, Cline.PROMPT)
         cl.register("nocase", Cline.LOCAL, Cline.PROMPT)
         cl.register("tmin", Cline.LOCAL, Cline.PROMPT)
-        cl.register("hcamdb", Cline.LOCAL, Cline.HIDE)
-        cl.register("ucamdb", Cline.LOCAL, Cline.HIDE)
-        cl.register("uspecdb", Cline.LOCAL, Cline.HIDE)
         cl.register("output", Cline.LOCAL, Cline.PROMPT)
 
         # get inputs
@@ -134,38 +133,74 @@ def logsearch(args=None):
             "tmin", "minimum exposure duration for a run to be included [seconds]", -1.
         )
 
-        hcamdb = cl.get_value(
-            "hcamdb", "path to hipercam sqlite3 database ['none' to ignore]",
-            cline.Fname("hipercam.db", ".db"), ignore="none"
-        )
-
-        ucamdb = cl.get_value(
-            "ucamdb", "path to ultracam sqlite3 database ['none' to ignore]",
-            cline.Fname("ultracam.db", ".db"), ignore="none"
-        )
-
-        uspecdb = cl.get_value(
-            "uspecdb", "path to ultraspec sqlite3 database ['none' to ignore]",
-            cline.Fname("ultraspec.db", ".db"), ignore="none"
-        )
-
         output = cl.get_value(
             "output", "name of spreadsheet of results ['none' to ignore]",
             cline.Fname('results', '.csv', cline.Fname.NEW), ignore="none"
         )
 
-    # check that at least one database is defined
-    dbs = []
-    if hcamdb is not None: dbs.append('HiPERCAM')
-    if ucamdb is not None: dbs.append('ULTRACAM')
-    if uspecdb is not None: dbs.append('ULTRASPEC')
+    # Get database files.
+
+    # First create directory for them if need be
+    dbases_dir = os.path.join(
+        os.environ.get(
+            'HIPERCAM_ENV',
+            os.path.join(os.environ["HOME"],'.hipercam')
+        ),
+        'dbases'
+    )
+    os.makedirs(dbases_dir, 0o700, True)
 
-    if len(dbs):
-        print(f"Will search the following instruments database files: {', '.join(dbs)}")
-    else:
-        print(f"No databases defined; please run 'logsearch' with 'prompt' to set them")
-        exit(1)
+    # Then download them. Passwords will be prompted and, if the
+    # subsequent download is successful, will be stored in the
+    # system keyring
 
+    server = 'https://cygnus.astro.warwick.ac.uk/phsaap/'
+
+    dbases = []
+    for dbase in ('ultracam', 'ultraspec', 'hipercam'):
+
+        pword = keyring.get_password("Data logs", dbase)
+        prompted = False
+        if pword is None:
+            pword = getpass.getpass(f'{dbase} logs password: ')
+            prompted = False
+
+        # use 'curl' to download
+        fname = os.path.join(dbases_dir, f'{dbase}.db')
+        args = [
+            'curl','-u', f'{dbase}:{pword}','-o',fname,
+            '-z',fname,f'{server}/{dbase}/logs/{dbase}.db'
+        ]
+        result = subprocess.run(
+            args, capture_output=True, universal_newlines=True
+        )
+        if result.returncode and not os.path.exists(fname):
+            raise hcam.HipercamError(
+                f'Failed to download {dbase}.db. Return from curl:'
+                + 'stdout={result.stdout}, stderr={result.stderr}'
+            )
+        elif result.returncode:
+            print(
+                f'Failed to download {dbase}.db. Will use old'
+                'local copy although it may be out of date'
+            )
+        elif prompted:
+            # successful, will store password in the keyring
+            keyring.set_password("Data logs", dbase, pword)
+            print(f' stored password for {dbase} in keyring')
+
+        # check return from curl
+        res = result.stderr.split('\n')
+        diff = set(res[3].split())
+        if '0' in diff and len(diff) == 2:
+            print(f' {dbase}.db unchanged on server')
+        else:
+            print(f' {dbase}.db updated from server')
+
+        # accumulate list of files and equivalent table names
+        dbases.append((fname, dbase))
+
+    print()
     if target is not None:
         name, ra, dec = target_lookup(target)
         if name == 'UNDEF':
@@ -184,12 +219,6 @@ def logsearch(args=None):
         declo = dec - field
         dechi = dec + field
 
-    # assemble pairs of databases files and tables
-    dbases = []
-    if hcamdb is not None: dbases.append((hcamdb,'hipercam'))
-    if ucamdb is not None: dbases.append((ucamdb,'ultracam'))
-    if uspecdb is not None: dbases.append((uspecdb,'ultraspec'))
-
     results = []
     for dbase, dtable in dbases: