Skip to content

Commit

Permalink
Fix hitlet splitting (#549)
Browse files Browse the repository at this point in the history
* Fix zero-length hitlets

* Fix small bug in get_hitlet_data

* Refactored comments

* Refactored hitlet testing

* Add test for empty-overlap

* Additional information

Co-authored-by: Joran Angevaare <jorana@nikhef.nl>
  • Loading branch information
WenzDaniel and JoranAngevaare committed Oct 12, 2021
1 parent 9269ca0 commit 796fa13
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 29 deletions.
26 changes: 19 additions & 7 deletions strax/processing/hitlets.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ def get_hitlets_data(hitlets, records, to_pe, min_hitlet_sample=200):
'_copy_hitlets_to_hitlets_width_data')

_get_hitlets_data(hitlets_with_data_field, records, to_pe)
if np.any(hitlets_with_data_field['length'] == 0):
raise ValueError('Try to create zero length hitlets which is forbidden!')

return hitlets_with_data_field


Expand All @@ -228,16 +231,25 @@ def _get_hitlets_data(hitlets, records, to_pe):
r['length'],
h['time'] // h['dt'],
h['length'])


if (r_end - r_start) == 0 and (h_end - h_start) == 0:
# _touching_windows will give a range of overlapping records with hitlet
# independent of channel. Hence, in rare cases it might be that a record of
# channel A touches with a hitlet of channel B which starts before the previous
# record of channel b. Hence we get one non-overlapping record in channel b.
continue

if is_first_record:
# We need recorded_samples_offset because hits may extend beyond the boundaries of our recorded data.
# As the data is not defined in those regions we have to chop and realign our data. See the following
# Example: (fragment 0, 1) [2, 2, 2, 2] [2, 2, 2] with a hitfinder threshold of 1 and left/right
# extension of 3. In the first fragment our hitlet would range from 3 to 8 in the second from 8
# to 11. Hence we have to subtract from every h_start and h_end the offset of 3 to realign our data.
# Time and length of the hitlet are updated accordingly.
# We need recorded_samples_offset because hits may extend beyond the boundaries
# of our recorded data. As the data is not defined in those regions we have to
# chop and realign our data. See the following Example: (fragment 0, 1) [2, 2, 2,
# 2] [2, 2, 2] with a hitfinder threshold of 1 and left/right extension of 3. In
# the first fragment our hitlet would range from 3 to 8 in the second from 8 to
# 11. Hence we have to subtract from every h_start and h_end the offset of 3 to
# realign our data. Time and length of the hitlet are updated accordingly.
is_first_record = False
recorded_samples_offset = h_start

h_start -= recorded_samples_offset
h_end -= recorded_samples_offset

Expand Down
64 changes: 42 additions & 22 deletions tests/test_hitlet.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class TestGetHitletData(unittest.TestCase):
def setUp(self):
self.test_data = [1, 3, 2, 1, 0, 0]
self.test_data_truth = self.test_data[:-2]
self.records, self.hitlets = self.make_records_and_hitlets([[self.test_data]])

def make_records_and_hitlets(self, dummy_records):
records = self._make_fake_records(dummy_records)
Expand All @@ -84,50 +85,69 @@ def make_records_and_hitlets(self, dummy_records):
return records, hitlets

def test_inputs_are_empty(self):
records, hitlets = self.make_records_and_hitlets([[self.test_data]])
hitlets_empty = np.zeros(0, dtype=strax.hitlet_with_data_dtype(2))
records_empty = np.zeros(0, dtype=strax.record_dtype(10))

hitlets_result = strax.get_hitlets_data(hitlets_empty, records, np.ones(3000))
hitlets_result = strax.get_hitlets_data(hitlets_empty, self.records, np.ones(3000))
assert len(hitlets_result) == 0, 'get_hitlet_data returned result for empty hitlets'

hitlets_result = strax.get_hitlets_data(hitlets_empty, records_empty, np.ones(3000))
assert len(hitlets_result) == 0, 'get_hitlet_data returned result for empty hitlets'

self.assertRaises(ValueError, strax.get_hitlets_data, hitlets, records_empty, np.ones(3000))

with self.assertRaises(ValueError):
strax.get_hitlets_data(self.hitlets, records_empty, np.ones(3000))

def test_to_pe_wrong_shape(self):
records, hitlets = self.make_records_and_hitlets([[self.test_data]])
hitlets['channel'] = 2000
self.assertRaises(ValueError, strax.get_hitlets_data, hitlets, records, np.ones(10))
self.hitlets['channel'] = 2000
with self.assertRaises(ValueError):
strax.get_hitlets_data(self.hitlets, self.records, np.ones(10))

def test_get_hitlets_data_for_single_hitlet(self):
records, hitlets = self.make_records_and_hitlets([[self.test_data]])

hitlets = strax.get_hitlets_data(hitlets[0], records, np.ones(3000))
hitlets = strax.get_hitlets_data(self.hitlets[0], self.records, np.ones(3000))
self._test_data_is_identical(hitlets, [self.test_data_truth])

def test_data_field_is_empty(self):
records, hitlets = self.make_records_and_hitlets([[self.test_data]])

hitlets = strax.get_hitlets_data(hitlets, records, np.ones(3000))
self.assertRaises(ValueError, strax.get_hitlets_data, hitlets, records, np.ones(3000))
hitlets = strax.get_hitlets_data(self.hitlets, self.records, np.ones(3000))
with self.assertRaises(ValueError):
strax.get_hitlets_data(hitlets, self.records, np.ones(3000))
self._test_data_is_identical(hitlets, [self.test_data_truth])

def test_get_hitlets_data_without_data_field(self):
records, hitlets_with_data = self.make_records_and_hitlets([[self.test_data]])
hitlets = np.zeros(len(hitlets_with_data), strax.hitlet_dtype())
strax.copy_to_buffer(hitlets_with_data, hitlets, '_copy_hitlets_to_hitlets_without_data')
hitlets_empty = np.zeros(len(self.hitlets), strax.hitlet_dtype())
strax.copy_to_buffer(self.hitlets, hitlets_empty, '_copy_hitlets_to_hitlets_without_data')

hitlets = strax.get_hitlets_data(hitlets, records, np.ones(3000))
hitlets = strax.get_hitlets_data(hitlets_empty, self.records, np.ones(3000))
self._test_data_is_identical(hitlets, [self.test_data_truth])

def test_to_short_data_field(self):
records, hitlets = self.make_records_and_hitlets([[self.test_data]])
hitlets_to_short = np.zeros(len(hitlets), dtype=strax.hitlet_with_data_dtype(2))
strax.copy_to_buffer(hitlets, hitlets_to_short, '_refresh_hit_to_hitlet')
self.assertRaises(ValueError, strax.get_hitlets_data, hitlets_to_short, records, np.ones(3000))
hitlets_to_short = np.zeros(len(self.hitlets), dtype=strax.hitlet_with_data_dtype(2))
strax.copy_to_buffer(self.hitlets, hitlets_to_short, '_refresh_hit_to_hitlet')
with self.assertRaises(ValueError):
strax.get_hitlets_data(hitlets_to_short, self.records, np.ones(3000))

def test_empty_overlap(self):
records = np.zeros(3, strax.record_dtype(10))

# Create fake records for which hitlet overlaps with channel 0
# although hit is in channel 1. See also github.com/AxFoundation/strax/pull/549
records['channel'] = (0, 1, 1)
records['length'] = (10, 3, 10)
records['time'] = (0, 0, 5)
records['dt'] = 1
records['data'][-1] = np.ones(10)

# Assume we extend our hits by 1 sample hence hitlet starts at 4
hitlet = np.zeros(1, strax.hitlet_with_data_dtype(11))
hitlet['time'] = 4
hitlet['dt'] = 1
hitlet['length'] = 11
hitlet['channel'] = 1

hitlet = strax.get_hitlets_data(hitlet, records, np.ones(10))
assert hitlet['time'] == 5
assert hitlet['length'] == 10
assert np.sum(hitlet['data']) == 10
assert hitlet['data'][0,0] == 1

def test_get_hitlets_data(self):
dummy_records = [ # Contains Hitlet #:
Expand Down

0 comments on commit 796fa13

Please sign in to comment.