Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Specify defaults via run metadata / run doc #223

Merged
merged 7 commits into from
Dec 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/advanced/plugin_dev.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ You can specify defaults in several ways:
- ``default``: Use the given value as default.
- ``default_factory``: Call the given function (with no arguments) to produce a default. Use for mutable values such as lists.
- ``default_per_run``: Specify a list of 2-tuples: ``(start_run, default)``. Here start_run is a numerized run name (e.g 170118_1327; note the underscore is valid in integers since python 3.6) and ``default`` the option that applies from that run onwards.
- The ``strax_defaults`` dictionary in the run metadata. This overrides any defaults specified in the plugin code, but take care -- if you change a value here, there will be no record anywhere of what value was used previously, so you cannot reproduce your results anymore!

5 changes: 1 addition & 4 deletions strax/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@
from .mailbox import *
from .processor import *
from .context import *

# Just run this file, it will add new methods to Context
from . import run_selection
del run_selection
from .run_selection import *

from .io import *

Expand Down
40 changes: 27 additions & 13 deletions strax/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import builtins
import typing as ty
import warnings

import strax
export, __all__ = strax.exporter()
Expand Down Expand Up @@ -80,6 +81,11 @@ def __init__(self,
self.track = track
self.help = help

# if self.default_by_run is not OMITTED:
# warnings.warn(f"The {self.name} option uses default_by_run,"
# f" which will soon stop working!",
# DeprecationWarning)

type = builtins.type
if sum([self.default is not OMITTED,
self.default_factory is not OMITTED,
Expand All @@ -90,23 +96,28 @@ def __init__(self,
if type is OMITTED and default is not OMITTED:
self.type = type(default)

def get_default(self, run_id=None):
def get_default(self, run_id, run_defaults: dict = None):
"""Return default value for the option"""
if run_id is None:
run_id = 0 # TODO: think if this makes sense

if isinstance(run_id, str):
is_superrun = run_id.startswith('_')
if not is_superrun:
run_id = int(run_id.replace('_', ''))
else:
is_superrun = False

if run_defaults is not None and self.name in run_defaults:
return run_defaults[self.name]
if self.default is not OMITTED:
return self.default
if self.default_factory is not OMITTED:
return self.default_factory()

if self.default_by_run is not OMITTED:
# TODO: This legacy code for handling default_per_run will soon
# be removed!
if run_id is None:
run_id = 0 # TODO: think if this makes sense

if isinstance(run_id, str):
is_superrun = run_id.startswith('_')
if not is_superrun:
run_id = int(run_id.replace('_', ''))
else:
is_superrun = False

if callable(self.default_by_run):
raise RuntimeError(
"Using functions to specify per-run defaults is no longer"
Expand All @@ -128,10 +139,13 @@ def get_default(self, run_id=None):
"lowest run id {start_run} for which the default "
"of the option {self.name} is known.")
return use_value

raise InvalidConfiguration(f"Missing option {self.name} "
f"required by {self.taken_by}")

def validate(self, config, run_id=None, set_defaults=True):
def validate(self, config,
run_id=None, # TODO: will soon be removed
run_defaults=None, set_defaults=True):
"""Checks if the option is in config and sets defaults if needed.
"""
if self.name in config:
Expand All @@ -142,7 +156,7 @@ def validate(self, config, run_id=None, set_defaults=True):
f"Invalid type for option {self.name}. "
f"Excepted a {self.type}, got a {type(value)}")
elif set_defaults:
config[self.name] = self.get_default(run_id)
config[self.name] = self.get_default(run_id, run_defaults)


@export
Expand Down
40 changes: 34 additions & 6 deletions strax/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@

import strax
export, __all__ = strax.exporter()
__all__ += ['RUN_DEFAULTS_KEY']

RUN_DEFAULTS_KEY = 'strax_defaults'


@strax.takes_config(
strax.Option(name='storage_converter', default=False,
Expand Down Expand Up @@ -59,6 +63,7 @@ class Context:
context_config: dict

runs: ty.Union[pd.DataFrame, type(None)] = None
_run_defaults_cache: dict = None

def __init__(self,
storage=None,
Expand Down Expand Up @@ -90,6 +95,7 @@ def __init__(self,
for s in storage]

self._plugin_class_registry = dict()
self._run_defaults_cache = dict()

self.set_config(config, mode='replace')
self.set_context_config(kwargs, mode='replace')
Expand Down Expand Up @@ -171,7 +177,7 @@ def set_context_config(self, context_config=None, mode='update'):
mode=mode)

for opt in self.takes_config.values():
opt.validate(new_config, set_defaults=True)
opt.validate(new_config)

for k in new_config:
if k not in self.takes_config:
Expand Down Expand Up @@ -253,7 +259,7 @@ def show_config(self, data_type=None, pattern='*', run_id='9' * 20):
if not fnmatch.fnmatch(opt.name, pattern):
continue
try:
default = opt.get_default(run_id)
default = opt.get_default(self.run_defaults(run_id))
except strax.InvalidConfiguration:
default = strax.OMITTED
c = self.context_config if data_type is None else self.config
Expand Down Expand Up @@ -319,7 +325,9 @@ def _set_plugin_config(self, p, run_id, tolerant=True):
config = self.config.copy()
for opt in p.takes_config.values():
try:
opt.validate(config, run_id)
opt.validate(config,
run_id=run_id,
run_defaults=self.run_defaults(run_id))
except strax.InvalidConfiguration:
if not tolerant:
raise
Expand Down Expand Up @@ -464,6 +472,7 @@ def get_components(self, run_id: str,
"""Return components for setting up a processor
{get_docs}
"""

save = strax.to_str_tuple(save)
targets = strax.to_str_tuple(targets)

Expand Down Expand Up @@ -774,7 +783,9 @@ def get_iter(self, run_id: str,
else:
raise RuntimeError("Cannot automerge different data kinds!")

components = self.get_components(run_id, targets=targets, save=save,
components = self.get_components(run_id,
targets=targets,
save=save,
time_range=time_range)

# Cleanup the temp plugins
Expand Down Expand Up @@ -935,8 +946,25 @@ def run_metadata(self, run_id, projection=None) -> dict:
except (strax.DataNotAvailable, NotImplementedError):
self.log.debug(f"Frontend {sf} does not have "
f"run metadata for {run_id}")
raise strax.DataNotAvailable(f"No run-level metadata available "
f"for {run_id}")
raise strax.RunMetadataNotAvailable(f"No run-level metadata available "
f"for {run_id}")

def run_defaults(self, run_id):
"""Get configuration defaults from the run metadata (if these exist)

This will only call the rundb once while the context is in existence;
further calls to this will return a cached value.
"""
if run_id in self._run_defaults_cache:
return self._run_defaults_cache[run_id]
try:
defs = self.run_metadata(
run_id,
projection=RUN_DEFAULTS_KEY).get(RUN_DEFAULTS_KEY, dict())
except strax.RunMetadataNotAvailable:
defs = dict()
self._run_defaults_cache[run_id] = defs
return defs

def is_stored(self, run_id, target, **kwargs):
"""Return whether data type target has been saved for run_id
Expand Down
38 changes: 31 additions & 7 deletions strax/run_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from tqdm import tqdm

import strax
export, __all__ = strax.exporter()


@strax.Context.add_method
Expand Down Expand Up @@ -36,7 +37,7 @@ def list_available(self, target, **kwargs):


@strax.Context.add_method
def scan_runs(self,
def scan_runs(self: strax.Context,
check_available=tuple(),
store_fields=tuple()):
"""Update and return self.runs with runs currently available
Expand All @@ -52,7 +53,8 @@ def scan_runs(self,
"""
store_fields = tuple(set(
list(strax.to_str_tuple(store_fields))
+ ['name', 'number', 'tags', 'mode', 'sub_run_spec']
+ ['name', 'number', 'tags', 'mode',
strax.RUN_DEFAULTS_KEY]
+ list(self.context_config['store_run_fields'])))
check_available = tuple(set(
list(strax.to_str_tuple(check_available))
Expand All @@ -74,13 +76,25 @@ def scan_runs(self,

doc.setdefault('mode', '')

# Flatten the tags field, if it exists
# Convert tags list to a ,separated string
doc['tags'] = ','.join([t['name']
for t in doc.get('tags', [])])
for t in doc.get('tags', [])])

# Flatten the rest of the doc (mainly in case the mode field
# is something deeply nested)
doc = strax.flatten_dict(doc, separator='.', keep='sub_run_spec')
# Set a default livetime if we have start and stop
if ('start' in store_fields
and 'end' in store_fields
and 'livetime' in store_fields
and 'start' in doc
and 'end' in doc):
doc.setdefault('livetime', doc['end'] - doc['start'])

# Put the strax defaults stuff into a different cache
if strax.RUN_DEFAULTS_KEY in doc:
self._run_defaults_cache[doc['name']] = \
doc[strax.RUN_DEFAULTS_KEY]
del doc[strax.RUN_DEFAULTS_KEY]

doc = flatten_run_metadata(doc)

_temp_docs.append(doc)

Expand Down Expand Up @@ -277,3 +291,13 @@ def _tag_match(tag, pattern, pattern_type, ignore_underscore):
elif pattern_type == 're':
return bool(re.match(pattern, tag))
raise NotImplementedError


@export
def flatten_run_metadata(md):
# Flatten the tags field. Note this sets it to an empty string
# if it does not exist.
return strax.flatten_dict(
md,
separator='.',
keep=[strax.RUN_DEFAULTS_KEY, 'sub_run_spec', 'tags'])
5 changes: 2 additions & 3 deletions strax/storage/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,13 @@ def run_metadata(self, run_id, projection=None):
with open(path, mode='r') as f:
md = json.loads(f.read(),
object_hook=json_util.object_hook)
md = strax.flatten_dict(md, separator='.', keep='sub_run_spec')
md = strax.flatten_run_metadata(md)
if projection is not None:
md = {k: v
for k, v in md.items()
if k in projection}
return md


def write_run_metadata(self, run_id, metadata):
with open(self._run_meta_path(run_id), mode='w') as f:
f.write(json.dumps(metadata, default=json_util.default))
Expand Down Expand Up @@ -320,4 +319,4 @@ def _close(self):

@export
class InvalidFolderNameFormat(Exception):
pass
pass
21 changes: 21 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from strax.testutils import *


def test_core():
for allow_multiprocess in (False, True):
for max_workers in [1, 2]:
Expand Down Expand Up @@ -268,6 +269,7 @@ def test_run_selection():
st = strax.Context(storage=sf)
assert len(st.scan_runs()) == len(mock_rundb)
assert st.run_metadata('0') == mock_rundb[0]
assert st.run_metadata('0', projection='name') == {'name': '0'}

assert len(st.select_runs(run_mode='nice')) == 2
assert len(st.select_runs(include_tags='interesting')) == 2
Expand All @@ -280,6 +282,25 @@ def test_run_selection():
assert len(st.select_runs(run_id='*',
exclude_tags='bad')) == 1


def test_run_defaults():
mock_rundb = [{'name': '0', strax.RUN_DEFAULTS_KEY: dict(base_area=43)}]

with tempfile.TemporaryDirectory() as temp_dir:
sf = strax.DataDirectory(path=temp_dir)
for d in mock_rundb:
sf.write_run_metadata(d['name'], d)
st = strax.Context(storage=sf, register=[Records, Peaks])

# The run defaults get used
peaks = st.get_array('0', 'peaks')
assert np.all(peaks['area'] == 43)

# ... but the user can still override them
peaks = st.get_array('0', 'peaks', config=dict(base_area=44))
assert np.all(peaks['area'] == 44)


def test_dtype_mismatch():
mystrax = strax.Context(storage=[],
register=[Records, Peaks],
Expand Down