Skip to content

Commit

Permalink
Check data availability for single run (#416)
Browse files Browse the repository at this point in the history
* add available_for_run

* update docstring
  • Loading branch information
JoranAngevaare committed Apr 7, 2021
1 parent 2710156 commit 85e1610
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 2 deletions.
63 changes: 62 additions & 1 deletion strax/run_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import fnmatch
import re
import typing as ty

from collections import defaultdict
import numpy as np
import pandas as pd
from tqdm import tqdm
Expand Down Expand Up @@ -283,6 +283,67 @@ def define_run(self: strax.Context,
" run definition")


@strax.Context.add_method
def available_for_run(self: strax.Context,
run_id: str,
include_targets: ty.Union[None, list, tuple, str] = None,
exclude_targets: ty.Union[None, list, tuple, str] = None,
pattern_type: str = 'fnmatch') -> pd.DataFrame:
"""
For a given single run, check all the targets if they are stored.
Excludes the target if never stored anyway.
:param run_id: requested run
:param include_targets: targets to include e.g. raw_records,
raw_records* or *_nv. If multiple targets (e.g. a list) is
provided, the target should match any of the arguments!
:param exclude_targets: targets to exclude e.g. raw_records,
raw_records* or *_nv. If multiple targets (e.g. a list) is
provided, the target should match none of the arguments!
:param pattern_type: either 'fnmatch' (Unix filename pattern
matching) or 're' (Regular expression operations).
:return: Table of available data per target
"""
if not isinstance(run_id, str):
raise ValueError(f'Only single run_id is allowed (str),'
f' got {run_id} ({type(run_id)})')

if exclude_targets is None:
exclude_targets = []
if include_targets is None:
include_targets = []

is_stored = defaultdict(list)
for target in self._plugin_class_registry.keys():
# Skip targets that are not stored
if not self._plugin_class_registry[target].save_when > strax.SaveWhen.NEVER:
continue

# Should we include this target or exclude it?
include_t = []
exclude_t = False

for excl in strax.to_str_tuple(exclude_targets):
# Simple logic, if we match the excluded target, we should
# should not continue
if _tag_match(target, excl, pattern_type, False):
exclude_t = True
break

# We can match any of the "incl" targets, keep a list and check
# of any of the "incl" matches the target.
for incl in strax.to_str_tuple(include_targets):
include_t.append(_tag_match(target, incl, pattern_type, False))

# Convert to simple bool. If no include_targets is specified,
# all are fine, otherwise check at least one is matching.
include_t = True if not len(include_t) else any(include_t)

if include_t and not exclude_t:
is_stored['target'].append(target)
is_stored['is_stored'].append(self.is_stored(run_id, target))
return pd.DataFrame(is_stored)


def _tags_match(dsets, patterns, pattern_type, ignore_underscore):
result = np.zeros(len(dsets), dtype=np.bool)

Expand Down
18 changes: 17 additions & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,6 @@ def test_run_selection():
with tempfile.TemporaryDirectory() as temp_dir:
sf = strax.DataDirectory(path=temp_dir,
deep_scan=True, provide_run_metadata=True)

# Write mock runs db
for d in mock_rundb:
sf.write_run_metadata(d['name'], d)
Expand Down Expand Up @@ -448,3 +447,20 @@ def test_allow_multiple_inverted():
# actually depending on the second. In that case, we should
# subscribe the first target as the endpoint of the processing
test_allow_multiple(targets=('records', 'peaks',))


def test_available_for_run():
"""Very simply test the available_for_run function"""
with tempfile.TemporaryDirectory() as temp_dir:
mystrax = strax.Context(storage=strax.DataDirectory(temp_dir,
deep_scan=True),
register=[Records, Peaks])
targets = list(mystrax._plugin_class_registry.keys())
for exclude_i in range(len(targets)):
for include_i in range(len(targets)):
df = mystrax.available_for_run(run_id,
include_targets = targets[:include_i],
exclude_targets = targets[:exclude_i])
if len(df):
# We haven't made any data
assert not sum(df['is_stored'])

0 comments on commit 85e1610

Please sign in to comment.