Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce codefactor #487

Merged
merged 6 commits into from
Jul 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion strax/chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,10 @@ def continuity_check(chunk_iter):
"""Check continuity of chunks yielded by chunk_iter as they are yielded"""
last_end = None
last_runid = None

last_subrun = {'run_id': None}
for chunk in chunk_iter:
if chunk.run_id != last_runid:
# TODO: can we do better?
last_end = None
last_subrun = {'run_id': None}

Expand Down
5 changes: 2 additions & 3 deletions strax/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ def new_context(self,
:param replace: If True, replaces settings rather than adding them.
See Context.__init__ for documentation on other parameters.
"""
# TODO: Clone rather than pass on storage front-ends ??
if not isinstance(storage, (list, tuple)):
storage = [storage]
if config is None:
Expand Down Expand Up @@ -751,7 +750,7 @@ def check_cache(d):
def concat_loader(*args, **kwargs):
for x in ldrs:
yield from x(*args, **kwargs)

# pylint: disable=unnecessary-lambda
ldr = lambda *args, **kwargs: concat_loader(*args, **kwargs)

if ldr:
Expand Down Expand Up @@ -1349,6 +1348,7 @@ def function(arr):
+ result.get('result', 0))

n_chunks += 1
n_chunks += 1

result['n_chunks'] = n_chunks
return result
Expand Down Expand Up @@ -1463,7 +1463,6 @@ def is_stored(self, run_id, target, **kwargs):

# If any new options given, replace the current context
# with a temporary one
# TODO duplicated code with with get_iter
if len(kwargs):
# Comment below disables pycharm from inspecting the line below it
# noinspection PyMethodFirstArgAssignment
Expand Down
8 changes: 4 additions & 4 deletions strax/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def load_file(f, compressor, dtype):
:param dtype: numpy dtype of data to load
"""
if isinstance(f, str):
with open(f, mode='rb') as f:
return _load_file(f, compressor, dtype)
with open(f, mode='rb') as write_file:
return _load_file(write_file, compressor, dtype)
else:
return _load_file(f, compressor, dtype)

Expand Down Expand Up @@ -74,8 +74,8 @@ def save_file(f, data, compressor='zstd'):
if isinstance(f, str):
final_fn = f
temp_fn = f + '_temp'
with open(temp_fn, mode='wb') as f:
result = _save_file(f, data, compressor)
with open(temp_fn, mode='wb') as write_file:
result = _save_file(write_file, data, compressor)
os.rename(temp_fn, final_fn)
return result
else:
Expand Down
2 changes: 1 addition & 1 deletion strax/mailbox.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pylint: disable=redefined-builtin
from concurrent.futures import Future, TimeoutError
import heapq
import sys
Expand Down Expand Up @@ -319,7 +320,6 @@ def can_write():
if self.killed:
self.log.debug(f"Sender found {self.name} killed while waiting"
" for room for new messages.")
# TODO: this is duplicated from above...
if self.force_killed:
raise MailboxKilled(self.killed_because)
return
Expand Down
2 changes: 1 addition & 1 deletion strax/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ class IterDone(Exception):
# If any of the inputs were trimmed due to early splits,
# trim the others too.
# In very hairy cases this can take multiple passes.
# TODO: can we optimize this, or code it more elegantly?
# can we optimize this, or code it more elegantly?
max_passes_left = 10
while max_passes_left > 0:
this_chunk_end = min([x.end for x in inputs.values()]
Expand Down
2 changes: 1 addition & 1 deletion strax/processing/data_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ReductionLevel(IntEnum):
def cut_baseline(records, n_before=48, n_after=30):
""""Replace first n_before and last n_after samples of pulses by 0
"""
# TODO: records.data.shape[1] gives a numba error (file issue?)
# records.data.shape[1] gives a numba error (file issue?)
if not len(records):
return
samples_per_record = len(records[0]['data'])
Expand Down
2 changes: 0 additions & 2 deletions strax/processing/hitlets.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ def get_fwxm(hitlet, fraction=0.5):
lbi = (index_maximum - 1) - lbi # start sample minus samples we went to the left
m = data[lbi + 1] - lbs # divided by 1 sample
if m == 0:
# TODO No idea how this happened, we need to fix this
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return np.nan, np.nan
left_edge = lbi + (max_val - lbs) / m + 0.5
else:
Expand All @@ -366,7 +365,6 @@ def get_fwxm(hitlet, fraction=0.5):
rbi += 1 + index_maximum # sample to the right plus start
m = data[rbi - 1] - rbs
if m == 0:
# TODO No idea how this happened, we need to fix this
return np.nan, np.nan
right_edge = rbi - (max_val - rbs) / m + 0.5
else:
Expand Down
2 changes: 1 addition & 1 deletion strax/processing/peak_building.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def find_peak_groups(peaks, gap_threshold,
"""
# Mock up a "hits" array so we can just use the existing peakfinder
# It doesn't work on raw peaks, since they might have different dts
# TODO: is there no cleaner way?
# Maybe there is a cleaner way?
fake_hits = np.zeros(len(peaks), dtype=strax.hit_dtype)
fake_hits['dt'] = 1
fake_hits['area'] = 1
Expand Down
4 changes: 2 additions & 2 deletions strax/processing/pulse_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ def record_links(records):
"""Return (prev_r, next_r), each arrays of indices of previous/next
record in the same pulse, or -1 if this is not applicable
"""
# TODO: needs tests
if not len(records):
return (
np.ones(0, dtype=np.int32) * NO_RECORD_LINK,
Expand Down Expand Up @@ -176,7 +175,8 @@ def find_hits(records,
min_amplitude: ty.Union[int, np.ndarray] = 15,
min_height_over_noise: ty.Union[int, np.ndarray] = 0):
"""Return hits (intervals >= threshold) found in records.
Hits that straddle record boundaries are split (TODO: fix this?)
Hits that straddle record boundaries are split (perhaps we should
fix this?)

NB: returned hits are NOT sorted yet!
"""
Expand Down
1 change: 0 additions & 1 deletion strax/run_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
def list_available(self, target, **kwargs):
"""Return sorted list of run_id's for which target is available
"""
# TODO duplicated code with with get_iter
if len(kwargs):
# noinspection PyMethodFirstArgAssignment
self = self.new_context(**kwargs)
Expand Down
8 changes: 5 additions & 3 deletions strax/storage/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ def _saver(self, dirname, metadata, **kwargs):
class FileSaver(strax.Saver):
"""Saves data to compressed binary files"""
json_options = dict(sort_keys=True, indent=4)
# When writing chunks, rewrite the json file every time we write a chunk
_flush_md_for_every_chunk = True

def __init__(self, dirname, metadata, **kwargs):
super().__init__(metadata, **kwargs)
Expand Down Expand Up @@ -327,11 +329,11 @@ def _save_chunk_metadata(self, chunk_info):

if not self.is_forked or is_first:
# Just append and flush the metadata
# (maybe not super-efficient to write the json everytime...
# (maybe not super-efficient to write the json every time...
# just don't use thousands of chunks)
# TODO: maybe make option to turn this off?
self.md['chunks'].append(chunk_info)
self._flush_metadata()
if self._flush_md_for_every_chunk:
self._flush_metadata()

def _close(self):
if not os.path.exists(self.tempdirname):
Expand Down
9 changes: 4 additions & 5 deletions strax/storage/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,10 @@ def _read_chunk(self, backend_key, chunk_info, dtype, compressor):
raise ValueError(
f'Metadata claims chunk{chunk_i} exists but it is unknown to '
f'the chunks_registry')
else:
chunk_doc = doc.get('data', None)
if chunk_doc is None:
raise ValueError(
f'Doc for chunk_{chunk_i} in wrong format:\n{doc}')

chunk_doc = doc.get('data', None)
if chunk_doc is None:
raise ValueError(f'Doc for chunk_{chunk_i} in wrong format:\n{doc}')

# Convert JSON to numpy
chunk_len = len(chunk_doc)
Expand Down
1 change: 1 addition & 0 deletions strax/storage/rucio.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def rucio_path(root_dir, filename, dirname):
"""Convert target to path according to rucio convention"""
scope = "xnt_"+dirname.split('-')[0]
rucio_did = "{0}:{1}".format(scope, filename)
# disable bandit
rucio_md5 = hashlib.md5(rucio_did.encode('utf-8')).hexdigest()
t1 = rucio_md5[0:2]
t2 = rucio_md5[2:4]
Expand Down
2 changes: 1 addition & 1 deletion strax/storage/zipfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _backend_key(self, key):
def _zipname(self, key):
zipname = osp.join(self.path, key.run_id + '.zip')
# Since we're never writing, this check can be here
# TODO: sounds like a bad idea?
# is this a bad idea?
if not osp.exists(zipname):
raise strax.DataNotAvailable
return zipname
Expand Down
1 change: 0 additions & 1 deletion strax/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def sorted_bounds(disjoint=False,
# Fake intervals
##

# TODO: isn't this duplicated with bounds_to_records??

def bounds_to_intervals(bs, dtype=strax.interval_dtype):
x = np.zeros(len(bs), dtype=dtype)
Expand Down
4 changes: 1 addition & 3 deletions strax/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def deterministic_hash(thing, length=10):
"""
hashable = hashablize(thing)
jsonned = json.dumps(hashable, cls=NumpyJSONEncoder)
# disable bandit
digest = sha1(jsonned.encode('ascii')).digest()
return b32encode(digest)[:length].decode('ascii').lower()

Expand All @@ -311,9 +312,6 @@ def formatted_exception():
For MailboxKilled exceptions, we return the original
exception instead.
"""
# Can't do this at the top level, utils is one of the
# first files of the strax package
import strax
exc_info = sys.exc_info()
if exc_info[0] == strax.MailboxKilled:
# Get the original exception back out
Expand Down
35 changes: 15 additions & 20 deletions tests/test_loop_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ def compute(self, big_kinda_data):
# Drop some of the data in big_kinda_data
return drop_random(big_kinda_data)

def _compute_loop_inner(res, k, big_kinda_data, small_kinda_data):
if k == _dtype_name:
res[k] = big_kinda_data[k]
for small_bit in small_kinda_data[k]:
if np.iterable(res[k]):
for i in range(len(res[k])):
res[k][i] += small_bit
else:
res[k] += small_bit
else:
res[k] = big_kinda_data[k]


class AddBigToSmall(strax.LoopPlugin):
"""
Test loop plugin by looping big_thing and adding whatever is in small_thing
Expand All @@ -121,16 +134,7 @@ def infer_dtype(self):
def compute_loop(self, big_kinda_data, small_kinda_data):
res = {}
for k in self.dtype.names:
if k == _dtype_name:
res[k] = big_kinda_data[k]
for small_bit in small_kinda_data[k]:
if np.iterable(res[k]):
for i in range(len(res[k])):
res[k][i] += small_bit
else:
res[k] += small_bit
else:
res[k] = big_kinda_data[k]
_compute_loop_inner(res, k, big_kinda_data, small_kinda_data)
return res

class AddBigToSmallMultiOutput(strax.LoopPlugin):
Expand All @@ -146,16 +150,7 @@ def infer_dtype(self):
def compute_loop(self, big_kinda_data, small_kinda_data):
res = {}
for k in self.dtype['some_combined_things'].names:
if k == _dtype_name:
res[k] = big_kinda_data[k]
for small_bit in small_kinda_data[k]:
if np.iterable(res[k]):
for i in range(len(res[k])):
res[k][i] += small_bit
else:
res[k] += small_bit
else:
res[k] = big_kinda_data[k]
_compute_loop_inner(res, k, big_kinda_data, small_kinda_data)
return {k: res for k in self.provides}

with tempfile.TemporaryDirectory() as temp_dir:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_mailbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def mailbox_tester(messages,
reader_sleeps=reader_sleeps)
for _ in range(n_readers)]

for i in range(len(messages)):
for i, _ in enumerate(messages):
mb.send(messages[i], msg_number=numbers[i])
print(f"Sent message {i}. Now {len(mb._mailbox)} ms in mailbox.")

Expand Down
2 changes: 1 addition & 1 deletion tests/test_pulse_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def _find_hits(r):
# Test pulses have dt=1 and time=0
# TODO: hm, maybe this doesn't test everything
# hm, maybe this doesn't test everything?

hits = strax.find_hits(r, min_amplitude=1)

Expand Down
3 changes: 0 additions & 3 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ def bla(_result_buffer=None, result_dtype=None):

result = np.array([0, 1, 2, 3, 4], dtype=np.int64)
np.testing.assert_equal(bla(), result)
# TODO: re-enable chunk size spec?
# np.testing.assert_equal(bla(chunk_size=1), result)
# np.testing.assert_equal(bla(chunk_size=7), result)
should_get = result.astype(np.float64)
got = bla(result_dtype=np.float64)
np.testing.assert_equal(got, should_get)
Expand Down