Skip to content

Commit

Permalink
Merge branch 'master' into add_chunk_yielding_for_fuse
Browse files Browse the repository at this point in the history
  • Loading branch information
WenzDaniel committed Sep 21, 2023
2 parents addc2c5 + 5aba752 commit bc4e4eb
Show file tree
Hide file tree
Showing 13 changed files with 127 additions and 46 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.5.2
current_version = 1.5.4
files = setup.py strax/__init__.py docs/source/conf.py
commit = True
tag = True
23 changes: 23 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,26 @@
1.5.4 / 2023-09-19
---------------------
* Split compare_metadata into utils.compare_meta by @dachengx in https://github.com/AxFoundation/strax/pull/754
* Change endtime - time >= 0 to endtime >= time by @JYangQi00 in https://github.com/AxFoundation/strax/pull/756
* Mandatorily wrap `_read_chunk` in a `check_chunk_n` decorator by @dachengx in https://github.com/AxFoundation/strax/pull/758

New Contributors
* @JYangQi00 made their first contribution in https://github.com/AxFoundation/strax/pull/756

**Full Changelog**: https://github.com/AxFoundation/strax/compare/v1.5.3...v1.5.4


1.5.3 / 2023-08-29
---------------------
* Add small selection functions by @WenzDaniel in https://github.com/AxFoundation/strax/pull/746
* Patch plugin cache by @WenzDaniel in https://github.com/AxFoundation/strax/pull/748
* Update version of urllib3, remove version control of deepdiff by @dachengx in https://github.com/AxFoundation/strax/pull/749
* Check chunk size right after loading chunk by @dachengx in https://github.com/AxFoundation/strax/pull/752


**Full Changelog**: https://github.com/AxFoundation/strax/compare/v1.5.2...v1.5.3


1.5.2 / 2023-07-06
---------------------
* Use warning also in `abs_time_to_prev_next_interval` by @dachengx in https://github.com/AxFoundation/strax/pull/738
Expand Down
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@
# built documents.
#
# The short X.Y version.
version = '1.5.2'
version = '1.5.4'
# The full version, including alpha/beta/rc tags.
release = '1.5.2'
release = '1.5.4'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def open_requirements(path):
history = file.read()

setuptools.setup(name='strax',
version='1.5.2',
version='1.5.4',
description='Streaming analysis for xenon TPCs',
author='Jelle Aalbers',
url='https://github.com/AxFoundation/strax',
Expand Down
2 changes: 1 addition & 1 deletion strax/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# flake8: noqa
__version__ = '1.5.2'
__version__ = '1.5.4'

# Glue the package together
# See https://www.youtube.com/watch?v=0oTh1CXRaQ0 if this confuses you
Expand Down
22 changes: 22 additions & 0 deletions strax/chunk.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import typing as ty
from functools import wraps

import numpy as np
import numba
Expand Down Expand Up @@ -425,3 +426,24 @@ def _update_subruns_in_chunk(chunks):
else:
subruns[subrun_id] = subrun_start_end
return subruns


@export
def check_chunk_n(f):
@wraps(f)
def wrapper(self, *args, **kwargs):
# assume chunk_info is the second argument
if 'chunk_info' not in kwargs:
raise ValueError(
"chunk_info not passed to function, check_chunk_n ",
"can only be used with functions that take chunk_info as an argument, ",
"usually it is the strax.StorageBackend._read_chunk method."
)
chunk_info = kwargs['chunk_info']
chunk = f(self, *args, **kwargs)
if len(chunk) != chunk_info['n']:
raise strax.DataCorrupted(
f"Chunk {chunk_info['filename']} of {chunk_info['run_id']} has {len(chunk)} items, "
f"but chunk_info {chunk_info} says {chunk_info['n']}")
return chunk
return wrapper
39 changes: 1 addition & 38 deletions strax/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import json
import numpy as np
import pandas as pd
import click
import deepdiff
import strax
import inspect
import types
Expand Down Expand Up @@ -1690,9 +1688,6 @@ def compare_metadata(self, run_id, target, old_metadata):
:param old_metadata: path to metadata to compare, or a dictionary, or a tuple with
another run_id, target to compare against the metadata of the first id-target pair
"""
color_values = lambda oldval, newval: (
click.style(oldval, fg='red', bold=True), click.style(newval, fg='green', bold=True))
underline = lambda text, bold=True: click.style(text, bold=bold, underline=True)

# new metadata for the given runid + target; fetch from context
new_metadata = self.get_metadata(run_id, target)
Expand All @@ -1707,39 +1702,7 @@ def compare_metadata(self, run_id, target, old_metadata):
else:
raise ValueError(f"Expected old_metadata as `str` or `dict` got {type(old_metadata)}")

differences = deepdiff.DeepDiff(old_metadata, new_metadata)
for key, value in differences.items():
if key in ['values_changed', 'iterable_item_added', 'iterable_item_removed']:
print(underline(f"\n> {key}"))
for kk, vv in value.items():
if key == "values_changed":
old_values = vv['old_value']
new_values = vv['new_value']
elif key == "iterable_item_added":
old_values = "-"
new_values = vv
else: # if key == "iterable_item_removed":
old_values = vv
new_values = "-"
old, new = color_values(old_values, new_values)
click.secho(f"\t in {kk[4:]}", bold=False)
print(f"\t\t{old} -> {new}")
elif key in ['dictionary_item_added', 'dictionary_item_removed']:
color = "red" if "removed" in key else "green"
print(underline(f"\n> {key:25s}"), end="->")
click.secho(f"\t{', '.join(value)}", fg=color)
elif key in ['type_changes']:
print(underline(f"\n> {key}"))
for kk, vv in value.items():
click.secho(f"\t{kk}")
oldtype = vv['old_type']
newtype = vv['new_type']
keyold, keynew = color_values('old_type', 'new_type')
valueold, valuenew = color_values(vv['old_value'], vv['new_value'])
print(f"\t\t{keyold:10s} : {oldtype} ({valueold})")
print(f"\t\t{keynew:10s} : {newtype} ({valuenew})")
else:
raise KeyError(f"Unkown key in comparison {key}")
strax.utils.compare_dict(old_metadata, new_metadata)

def run_metadata(self, run_id, projection=None) -> dict:
"""
Expand Down
2 changes: 1 addition & 1 deletion strax/processing/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ def _check_time_is_sorted(time):
@numba.jit(nopython=True, nogil=True, cache=True)
def _check_objects_non_negative_length(objects):
"""Checks if objects have non-negative length"""
mask = np.all(strax.endtime(objects) - objects['time'] >= 0)
mask = np.all(strax.endtime(objects) >= objects['time'])
assert mask


Expand Down
3 changes: 2 additions & 1 deletion strax/run_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ def scan_runs(self: strax.Context,
# Add available data types,
# this is kept for the case users directly call list_available
for d in tqdm(check_available,
desc='Checking data availability'):
desc='Checking data availability scan_runs',
disable=not len(check_available)):
self.runs[d + '_available'] = np.in1d(
self.runs.name.values,
self.list_available(d))
Expand Down
7 changes: 7 additions & 0 deletions strax/storage/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,13 @@ class StorageBackend:
these have to be hardcoded (or made part of the key).
"""

def __new__(cls, *args, **kwargs):
"""Mandatorily wrap _read_chunk in a check_chunk_n decorator"""
if '_read_chunk' in cls.__dict__:
method = getattr(cls, '_read_chunk')
setattr(cls, '_read_chunk', strax.check_chunk_n(method))
return super(StorageBackend, cls).__new__(cls)

def loader(self,
backend_key,
time_range=None,
Expand Down
1 change: 0 additions & 1 deletion strax/storage/zipfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ def zip_dir(input_dir, output_zipfile, delete=False):

@export
class ZipFileBackend(strax.StorageBackend):

def _read_chunk(self, zipn_and_dirn, chunk_info, dtype, compressor):
zipn, dirn = zipn_and_dirn
with zipfile.ZipFile(zipn) as zp:
Expand Down
43 changes: 43 additions & 0 deletions strax/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from collections.abc import Mapping
from warnings import warn
import os
import click
import deepdiff


# Change numba's caching backend from pickle to dill
Expand Down Expand Up @@ -311,6 +313,47 @@ def deterministic_hash(thing, length=10):
return b32encode(digest)[:length].decode('ascii').lower()


@export
def compare_dict(old: dict, new: dict):
"""Compare two dictionaries and print the differences"""
differences = deepdiff.DeepDiff(old, new)
color_values = lambda oldval, newval: (
click.style(oldval, fg='red', bold=True), click.style(newval, fg='green', bold=True))
underline = lambda text, bold=True: click.style(text, bold=bold, underline=True)
for key, value in differences.items():
if key in ['values_changed', 'iterable_item_added', 'iterable_item_removed']:
print(underline(f"\n> {key}"))
for kk, vv in value.items():
if key == "values_changed":
old_values = vv['old_value']
new_values = vv['new_value']
elif key == "iterable_item_added":
old_values = "-"
new_values = vv
else: # if key == "iterable_item_removed":
old_values = vv
new_values = "-"
old, new = color_values(old_values, new_values)
click.secho(f"\t in {kk[4:]}", bold=False)
print(f"\t\t{old} -> {new}")
elif key in ['dictionary_item_added', 'dictionary_item_removed']:
color = "red" if "removed" in key else "green"
print(underline(f"\n> {key:25s}"), end="->")
click.secho(f"\t{', '.join(value)}", fg=color)
elif key in ['type_changes']:
print(underline(f"\n> {key}"))
for kk, vv in value.items():
click.secho(f"\t{kk}")
oldtype = vv['old_type']
newtype = vv['new_type']
keyold, keynew = color_values('old_type', 'new_type')
valueold, valuenew = color_values(vv['old_value'], vv['new_value'])
print(f"\t\t{keyold:10s} : {oldtype} ({valueold})")
print(f"\t\t{keynew:10s} : {newtype} ({valuenew})")
else:
raise KeyError(f"Unkown key in comparison {key}")


@export
def formatted_exception():
"""Return human-readable multiline string with info
Expand Down
23 changes: 23 additions & 0 deletions tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import strax
from strax.testutils import Records
import os
import json
import tempfile
import numpy as np
import typing as ty
Expand Down Expand Up @@ -168,6 +169,28 @@ def test_close_goes_first_on_loading(self):
# else:
# self.assertNotEqual(len_from_compare, len_from_main_st)

def test_check_chunk_n(self):
"""
Check that check_chunk_n can detect when metadata is lying
"""
st, frontend_setup = self.get_st_and_fill_frontends()

sf = st.storage[0]
st_new = st.new_context()
st_new.storage = [sf]
key = st_new.key_for(self.run_id, self.target)
backend, backend_key = sf.find(key, **st_new._find_options)
prefix = strax.storage.files.dirname_to_prefix(backend_key)
md = st_new.get_metadata(self.run_id, self.target)
md['chunks'][0]['n'] += 1
md_path = os.path.join(backend_key, f'{prefix}-metadata.json')
with open(md_path, "w") as file:
json.dump(md, file, indent=4)

with self.assertRaises(strax.DataCorrupted):
assert st_new.is_stored(self.run_id, self.target)
st_new.get_array(self.run_id, self.target)

def test_float_remoteness_allowed(self):
"""
It can happen that the pre-defined remoteness identifiers in
Expand Down

0 comments on commit bc4e4eb

Please sign in to comment.