Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use int32 for peak dt, fix #397 #403

Merged
merged 12 commits into from
Apr 12, 2021
12 changes: 8 additions & 4 deletions strax/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
__all__ = ('interval_dtype raw_record_dtype record_dtype hit_dtype peak_dtype '
'DIGITAL_SUM_WAVEFORM_CHANNEL DEFAULT_RECORD_LENGTH '
'time_fields time_dt_fields hitlet_dtype hitlet_with_data_dtype '
'copy_to_buffer').split()
'copy_to_buffer peak_interval_dtype').split()

DIGITAL_SUM_WAVEFORM_CHANNEL = -1
DEFAULT_RECORD_LENGTH = 110
Expand All @@ -33,11 +33,15 @@
(('Width of one sample [ns]',
'dt'), np.int16)]

# Base dtype for interval-like objects (pulse, peak, hit)
# Base dtype for interval-like objects (pulse, hit)
interval_dtype = time_dt_fields + [
(('Channel/PMT number',
'channel'), np.int16)]

# Base dtype with interval like objects for long objects (peaks)
peak_interval_dtype = interval_dtype.copy()
# Allow peaks to have very long dts
peak_interval_dtype[2] = (('Width of one sample [ns]', 'dt'), np.int32)

def raw_record_dtype(samples_per_record=DEFAULT_RECORD_LENGTH):
"""Data type for a waveform raw_record.
Expand Down Expand Up @@ -178,7 +182,7 @@ def peak_dtype(n_channels=100, n_sum_wv_samples=200, n_widths=11):
if n_channels == 1:
raise ValueError("Must have more than one channel")
# Otherwise array changes shape?? badness ensues
return interval_dtype + [
return peak_interval_dtype + [
# For peaklets this is likely to be overwritten:
(('Classification of the peak(let)',
'type'), np.int8),
Expand Down Expand Up @@ -217,7 +221,7 @@ def copy_to_buffer(source: np.ndarray,
with the name 'func_name' (should start with "_").

:param source: array of input
:param destination: array of buffer to fill with values from input
:param buffer: array of buffer to fill with values from input
:param func_name: how to store the dynamically created function.
Should start with an _underscore
:param field_names: dtype names to copy (if none, use all in the
Expand Down
13 changes: 11 additions & 2 deletions tests/test_peak_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ def test_sum_waveform(records, peak_left, peak_length):
(1, 1, 1, 1, 0, 0, 0, 0, 0., 0., 0, [1, 0])],
dtype=strax.record_dtype(2)),
gap_factor=108,
max_duration=1000,
right_extension=5000,
gap_threshold=18000,
max_duration=int(7_000_000),
)
def test_peak_overflow(records,
gap_factor,
Expand Down Expand Up @@ -132,8 +132,11 @@ def test_peak_overflow(records,

# Set this here, no need to test left and right independently
left_extension = 0
# Make a single big peak to contain all the records
peak_dtype = np.zeros(0, strax.peak_dtype()).dtype
magic_overflow_time = np.iinfo(peak_dtype['dt']).max * peak_dtype['data'].shape[0]
# NB! This is only for before #403, now peaks are int32 so
# this test would take forever with int32.
magic_overflow_time = np.iinfo(np.int16).max * peak_dtype['data'].shape[0]

def retrun_1(x):
"""
Expand All @@ -154,6 +157,7 @@ def retrun_1(x):
# Copy the pulse train of the records. We are going to copy the same
# set of records many times now.
t_max = strax.endtime(r).max()
print('make buffer')
n_repeat = int(1.5 * magic_overflow_time + t_max * gap_factor) // int(t_max * gap_factor) + 1
time_offset = np.linspace(0,
1.5 * magic_overflow_time + t_max * gap_factor,
Expand All @@ -165,8 +169,10 @@ def retrun_1(x):
assert strax.endtime(r_buffer[-1]) - r_buffer['time'].min() > magic_overflow_time
r = r_buffer.copy()
del r_buffer
print(f'Array is {r.nbytes/1e6} MB, good luck')

# Do peak finding!
print(f'Find hits')
hits = strax.find_hits(r, min_amplitude=0)
assert len(hits)
hits = strax.sort_by_time(hits)
Expand All @@ -175,6 +181,7 @@ def retrun_1(x):
to_pe = np.ones(max(r['channel'])+1)

try:
print('Find peaks')
# Find peaks, we might end up with negative dt here!
p = strax.find_peaks(hits, to_pe,
gap_threshold=gap_threshold,
Expand All @@ -200,6 +207,7 @@ def retrun_1(x):
# The error is caused by something else, we need to re-raise
raise e

print(f'Peaklet array is {p.nbytes / 1e6} MB, good luck')
if len(p) == 0:
print(f'rec length {len(r)}')
assert len(p)
Expand All @@ -215,6 +223,7 @@ def retrun_1(x):
strax.compute_widths(p)

try:
print('Split peaks')
peaklets = strax.split_peaks(
p, r, to_pe,
algorithm='natural_breaks',
Expand Down