Merge pull request #342 from BCDA-APS/341-databroker-metrics

summarize runs in databroker by plan_name and frequency
BCDA-APS · Jul 10, 2020 · 1f6fc83 · 1f6fc83
2 parents b2a2034 + e13e955
commit 1f6fc83
Showing 1 changed file with 62 additions and 1 deletion.
diff --git a/apstools/utils.py b/apstools/utils.py
@@ -30,6 +30,7 @@
    ~safe_ophyd_name
    ~show_ophyd_symbols
    ~split_quoted_line
+   ~summarize_runs
    ~text_encode
    ~to_unicode_or_bust
    ~trim_string_for_EPICS
@@ -49,8 +50,9 @@
 
 from bluesky.callbacks.best_effort import BestEffortCallback
 from bluesky import plan_stubs as bps
-from collections import OrderedDict
+from collections import defaultdict, OrderedDict
 import databroker
+import databroker.queries
 import datetime
 from email.mime.text import MIMEText
 from event_model import NumpyEncoder
@@ -727,6 +729,65 @@ def split_quoted_line(line):
     return parts
 
 
+def summarize_runs(since=None, db=None):
+    """
+    Report bluesky run metrics from the databroker.
+
+    * How many different plans?
+    * How many runs?
+    * How many times each run was used?
+    * How frequently?  (TODO:)
+
+    PARAMETERS
+
+    since (str) :
+        Report all runs since this ISO8601 date & time (default: ``1995``)
+    db (object) :
+        Instance of ``databroker.Broker()``
+        (default: ``db`` from the IPython shell)
+    """
+    db = db or ipython_shell_namespace()["db"]
+    since = since or "1995"     # no APS X-ray experiment data before 1995!
+    cat = db.v2.search(databroker.queries.TimeRange(since=since))
+    plans = defaultdict(list)
+    t0 = time.time()
+    for n, uid in enumerate(cat):
+        t1 = time.time()
+        run = cat[uid]      # this step is very slow (0.01 - 0.5 seconds each!)
+        t2 = time.time()
+        plan_name = run.metadata["start"].get("plan_name", "unknown")
+        dt = datetime.datetime.fromtimestamp(run.metadata["start"]["time"]).isoformat()
+        scan_id = run.metadata["start"].get("scan_id", "unknown")
+        plans[plan_name].append(
+            dict(
+                plan_name=plan_name,
+                dt=dt,
+                time_start=dt,
+                uid=uid,
+                scan_id=scan_id,
+            )
+        )
+        logger.debug(
+            "%s %s dt1=%4.01fus dt2=%5.01fms %s",
+            scan_id,
+            dt,
+            (t1-t0)*1e6,
+            (t2-t1)*1e3,
+            plan_name,
+            )
+        t0 = time.time()
+
+    def sorter(plan_name):
+        return len(plans[plan_name])
+
+    table = pyRestTable.Table()
+    table.labels = "plan quantity".split()
+    for k in sorted(plans.keys(), key=sorter, reverse=True):
+        table.addRow((k, sorter(k)))
+    table.addRow(("TOTAL", n+1))
+    print(table)
+
+
 def text_encode(source):
     """Encode ``source`` using the default codepoint."""
     return source.encode(errors='ignore')