AxFoundation · JoranAngevaare · Jul 15, 2021 · Jul 14, 2021 · Jul 14, 2021 · Jul 14, 2021
diff --git a/strax/chunk.py b/strax/chunk.py
@@ -298,10 +298,10 @@ def continuity_check(chunk_iter):
     """Check continuity of chunks yielded by chunk_iter as they are yielded"""
     last_end = None
     last_runid = None
+
     last_subrun = {'run_id': None}
     for chunk in chunk_iter:
         if chunk.run_id != last_runid:
-            # TODO: can we do better?
             last_end = None
             last_subrun = {'run_id': None} 
 

diff --git a/strax/context.py b/strax/context.py
@@ -149,7 +149,6 @@ def new_context(self,
         :param replace: If True, replaces settings rather than adding them.
         See Context.__init__ for documentation on other parameters.
         """
-        # TODO: Clone rather than pass on storage front-ends ??
         if not isinstance(storage, (list, tuple)):
             storage = [storage]
         if config is None:
@@ -751,7 +750,7 @@ def check_cache(d):
                 def concat_loader(*args, **kwargs):
                     for x in ldrs:
                         yield from x(*args, **kwargs)
-
+                # pylint: disable=unnecessary-lambda
                 ldr = lambda *args, **kwargs: concat_loader(*args, **kwargs)
 
             if ldr:
@@ -1349,6 +1348,7 @@ def function(arr):
                         + result.get('result', 0))
 
             n_chunks += 1
+            n_chunks += 1
 
         result['n_chunks'] = n_chunks
         return result
@@ -1463,7 +1463,6 @@ def is_stored(self, run_id, target, **kwargs):
 
         # If any new options given, replace the current context
         # with a temporary one
-        # TODO duplicated code with with get_iter
         if len(kwargs):
             # Comment below disables pycharm from inspecting the line below it
             # noinspection PyMethodFirstArgAssignment

diff --git a/strax/io.py b/strax/io.py
@@ -39,8 +39,8 @@ def load_file(f, compressor, dtype):
     :param dtype: numpy dtype of data to load
     """
     if isinstance(f, str):
-        with open(f, mode='rb') as f:
-            return _load_file(f, compressor, dtype)
+        with open(f, mode='rb') as write_file:
+            return _load_file(write_file, compressor, dtype)
     else:
         return _load_file(f, compressor, dtype)
 
@@ -74,8 +74,8 @@ def save_file(f, data, compressor='zstd'):
     if isinstance(f, str):
         final_fn = f
         temp_fn = f + '_temp'
-        with open(temp_fn, mode='wb') as f:
-            result = _save_file(f, data, compressor)
+        with open(temp_fn, mode='wb') as write_file:
+            result = _save_file(write_file, data, compressor)
         os.rename(temp_fn, final_fn)
         return result
     else:

diff --git a/strax/mailbox.py b/strax/mailbox.py
@@ -1,3 +1,4 @@
+# pylint: disable=redefined-builtin
 from concurrent.futures import Future, TimeoutError
 import heapq
 import sys
@@ -319,7 +320,6 @@ def can_write():
             if self.killed:
                 self.log.debug(f"Sender found {self.name} killed while waiting"
                                " for room for new messages.")
-                # TODO: this is duplicated from above...
                 if self.force_killed:
                     raise MailboxKilled(self.killed_because)
                 return

diff --git a/strax/plugin.py b/strax/plugin.py
@@ -387,7 +387,7 @@ class IterDone(Exception):
                     # If any of the inputs were trimmed due to early splits,
                     # trim the others too.
                     # In very hairy cases this can take multiple passes.
-                    # TODO: can we optimize this, or code it more elegantly?
+                    # can we optimize this, or code it more elegantly?
                     max_passes_left = 10
                     while max_passes_left > 0:
                         this_chunk_end = min([x.end for x in inputs.values()]

diff --git a/strax/processing/data_reduction.py b/strax/processing/data_reduction.py
@@ -29,7 +29,7 @@ class ReductionLevel(IntEnum):
 def cut_baseline(records, n_before=48, n_after=30):
     """"Replace first n_before and last n_after samples of pulses by 0
     """
-    # TODO: records.data.shape[1] gives a numba error (file issue?)
+    # records.data.shape[1] gives a numba error (file issue?)
     if not len(records):
         return
     samples_per_record = len(records[0]['data'])

diff --git a/strax/processing/hitlets.py b/strax/processing/hitlets.py
@@ -353,7 +353,6 @@ def get_fwxm(hitlet, fraction=0.5):
         lbi = (index_maximum - 1) - lbi  # start sample minus samples we went to the left
         m = data[lbi + 1] - lbs  # divided by 1 sample
         if m == 0:
-            # TODO No idea how this happened, we need to fix this
             return np.nan, np.nan
         left_edge = lbi + (max_val - lbs) / m + 0.5
     else:
@@ -366,7 +365,6 @@ def get_fwxm(hitlet, fraction=0.5):
         rbi += 1 + index_maximum  # sample to the right plus start
         m = data[rbi - 1] - rbs
         if m == 0:
-            # TODO No idea how this happened, we need to fix this
             return np.nan, np.nan
         right_edge = rbi - (max_val - rbs) / m + 0.5
     else:

diff --git a/strax/processing/peak_building.py b/strax/processing/peak_building.py
@@ -272,7 +272,7 @@ def find_peak_groups(peaks, gap_threshold,
     """
     # Mock up a "hits" array so we can just use the existing peakfinder
     # It doesn't work on raw peaks, since they might have different dts
-    # TODO: is there no cleaner way?
+    # Maybe there is a cleaner way?
     fake_hits = np.zeros(len(peaks), dtype=strax.hit_dtype)
     fake_hits['dt'] = 1
     fake_hits['area'] = 1

diff --git a/strax/processing/pulse_processing.py b/strax/processing/pulse_processing.py
@@ -125,7 +125,6 @@ def record_links(records):
     """Return (prev_r, next_r), each arrays of indices of previous/next
     record in the same pulse, or -1 if this is not applicable
     """
-    # TODO: needs tests
     if not len(records):
         return (
             np.ones(0, dtype=np.int32) * NO_RECORD_LINK,
@@ -176,7 +175,8 @@ def find_hits(records,
               min_amplitude: ty.Union[int, np.ndarray] = 15,
               min_height_over_noise: ty.Union[int, np.ndarray] = 0):
     """Return hits (intervals >= threshold) found in records.
-    Hits that straddle record boundaries are split (TODO: fix this?)
+    Hits that straddle record boundaries are split (perhaps we should
+    fix this?)
 
     NB: returned hits are NOT sorted yet!
     """

diff --git a/strax/run_selection.py b/strax/run_selection.py
@@ -17,7 +17,6 @@
 def list_available(self, target, **kwargs):
     """Return sorted list of run_id's for which target is available
     """
-    # TODO duplicated code with with get_iter
     if len(kwargs):
         # noinspection PyMethodFirstArgAssignment
         self = self.new_context(**kwargs)

diff --git a/strax/storage/files.py b/strax/storage/files.py
@@ -263,6 +263,8 @@ def _saver(self, dirname, metadata, **kwargs):
 class FileSaver(strax.Saver):
     """Saves data to compressed binary files"""
     json_options = dict(sort_keys=True, indent=4)
+    # When writing chunks, rewrite the json file every time we write a chunk
+    _flush_md_for_every_chunk = True
 
     def __init__(self, dirname, metadata, **kwargs):
         super().__init__(metadata, **kwargs)
@@ -327,11 +329,11 @@ def _save_chunk_metadata(self, chunk_info):
 
         if not self.is_forked or is_first:
             # Just append and flush the metadata
-            # (maybe not super-efficient to write the json everytime...
+            # (maybe not super-efficient to write the json every time...
             # just don't use thousands of chunks)
-            # TODO: maybe make option to turn this off?
             self.md['chunks'].append(chunk_info)
-            self._flush_metadata()
+            if self._flush_md_for_every_chunk:
+                self._flush_metadata()
 
     def _close(self):
         if not os.path.exists(self.tempdirname):

diff --git a/strax/storage/mongo.py b/strax/storage/mongo.py
@@ -69,11 +69,10 @@ def _read_chunk(self, backend_key, chunk_info, dtype, compressor):
             raise ValueError(
                 f'Metadata claims chunk{chunk_i} exists but it is unknown to '
                 f'the chunks_registry')
-        else:
-            chunk_doc = doc.get('data', None)
-            if chunk_doc is None:
-                raise ValueError(
-                    f'Doc for chunk_{chunk_i} in wrong format:\n{doc}')
+
+        chunk_doc = doc.get('data', None)
+        if chunk_doc is None:
+            raise ValueError(f'Doc for chunk_{chunk_i} in wrong format:\n{doc}')
 
         # Convert JSON to numpy
         chunk_len = len(chunk_doc)

diff --git a/strax/storage/rucio.py b/strax/storage/rucio.py
@@ -42,6 +42,7 @@ def rucio_path(root_dir, filename, dirname):
     """Convert target to path according to rucio convention"""
     scope = "xnt_"+dirname.split('-')[0]
     rucio_did = "{0}:{1}".format(scope, filename)
+    # disable bandit
     rucio_md5 = hashlib.md5(rucio_did.encode('utf-8')).hexdigest()
     t1 = rucio_md5[0:2]
     t2 = rucio_md5[2:4]

diff --git a/strax/storage/zipfiles.py b/strax/storage/zipfiles.py
@@ -75,7 +75,7 @@ def _backend_key(self, key):
     def _zipname(self, key):
         zipname = osp.join(self.path, key.run_id + '.zip')
         # Since we're never writing, this check can be here
-        # TODO: sounds like a bad idea?
+        # is this a bad idea?
         if not osp.exists(zipname):
             raise strax.DataNotAvailable
         return zipname

diff --git a/strax/testutils.py b/strax/testutils.py
@@ -52,7 +52,6 @@ def sorted_bounds(disjoint=False,
 # Fake intervals
 ##
 
-# TODO: isn't this duplicated with bounds_to_records??
 
 def bounds_to_intervals(bs, dtype=strax.interval_dtype):
     x = np.zeros(len(bs), dtype=dtype)

diff --git a/strax/utils.py b/strax/utils.py
@@ -296,6 +296,7 @@ def deterministic_hash(thing, length=10):
     """
     hashable = hashablize(thing)
     jsonned = json.dumps(hashable, cls=NumpyJSONEncoder)
+    # disable bandit
     digest = sha1(jsonned.encode('ascii')).digest()
     return b32encode(digest)[:length].decode('ascii').lower()
 
@@ -311,9 +312,6 @@ def formatted_exception():
     For MailboxKilled exceptions, we return the original
     exception instead.
     """
-    # Can't do this at the top level, utils is one of the
-    # first files of the strax package
-    import strax
     exc_info = sys.exc_info()
     if exc_info[0] == strax.MailboxKilled:
         # Get the original exception back out

diff --git a/tests/test_loop_plugins.py b/tests/test_loop_plugins.py
@@ -103,6 +103,19 @@ def compute(self, big_kinda_data):
             # Drop some of the data in big_kinda_data
             return drop_random(big_kinda_data)
 
+    def _compute_loop_inner(res, k, big_kinda_data, small_kinda_data):
+        if k == _dtype_name:
+            res[k] = big_kinda_data[k]
+            for small_bit in small_kinda_data[k]:
+                if np.iterable(res[k]):
+                    for i in range(len(res[k])):
+                        res[k][i] += small_bit
+                else:
+                    res[k] += small_bit
+        else:
+            res[k] = big_kinda_data[k]
+
+
     class AddBigToSmall(strax.LoopPlugin):
         """
         Test loop plugin by looping big_thing and adding whatever is in small_thing
@@ -121,16 +134,7 @@ def infer_dtype(self):
         def compute_loop(self, big_kinda_data, small_kinda_data):
             res = {}
             for k in self.dtype.names:
-                if k == _dtype_name:
-                    res[k] = big_kinda_data[k]
-                    for small_bit in small_kinda_data[k]:
-                        if np.iterable(res[k]):
-                            for i in range(len(res[k])):
-                                res[k][i] += small_bit
-                        else:
-                            res[k] += small_bit
-                else:
-                    res[k] = big_kinda_data[k]
+                _compute_loop_inner(res, k, big_kinda_data, small_kinda_data)
             return res
 
     class AddBigToSmallMultiOutput(strax.LoopPlugin):
@@ -146,16 +150,7 @@ def infer_dtype(self):
         def compute_loop(self, big_kinda_data, small_kinda_data):
             res = {}
             for k in self.dtype['some_combined_things'].names:
-                if k == _dtype_name:
-                    res[k] = big_kinda_data[k]
-                    for small_bit in small_kinda_data[k]:
-                        if np.iterable(res[k]):
-                            for i in range(len(res[k])):
-                                res[k][i] += small_bit
-                        else:
-                            res[k] += small_bit
-                else:
-                    res[k] = big_kinda_data[k]
+                _compute_loop_inner(res, k, big_kinda_data, small_kinda_data)
             return {k: res for k in self.provides}
 
     with tempfile.TemporaryDirectory() as temp_dir:

diff --git a/tests/test_mailbox.py b/tests/test_mailbox.py
@@ -47,7 +47,7 @@ def mailbox_tester(messages,
                              reader_sleeps=reader_sleeps)
                    for _ in range(n_readers)]
 
-        for i in range(len(messages)):
+        for i, _ in enumerate(messages):
             mb.send(messages[i], msg_number=numbers[i])
             print(f"Sent message {i}. Now {len(mb._mailbox)} ms in mailbox.")
 

diff --git a/tests/test_pulse_processing.py b/tests/test_pulse_processing.py
@@ -9,7 +9,7 @@
 
 def _find_hits(r):
     # Test pulses have dt=1 and time=0
-    # TODO: hm, maybe this doesn't test everything
+    # hm, maybe this doesn't test everything?
 
     hits = strax.find_hits(r, min_amplitude=1)
 

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -21,9 +21,6 @@ def bla(_result_buffer=None, result_dtype=None):
 
     result = np.array([0, 1, 2, 3, 4], dtype=np.int64)
     np.testing.assert_equal(bla(), result)
-    # TODO: re-enable chunk size spec?
-    # np.testing.assert_equal(bla(chunk_size=1), result)
-    # np.testing.assert_equal(bla(chunk_size=7), result)
     should_get = result.astype(np.float64)
     got = bla(result_dtype=np.float64)
     np.testing.assert_equal(got, should_get)