Skip to content

Commit

Permalink
Merge pull request #128 from DUNE/feature_backtrack
Browse files Browse the repository at this point in the history
mc_truth/interaction + true_pos in prompt_hit + backtrack
  • Loading branch information
YifanC committed Jun 7, 2024
2 parents f58217e + ad12385 commit 4f4f7f0
Show file tree
Hide file tree
Showing 8 changed files with 209 additions and 147 deletions.
35 changes: 13 additions & 22 deletions src/proto_nd_flow/reco/charge/calib_hit_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def init(self, source_name):

self.hit_frac_dtype = np.dtype([
('fraction', f'({self.max_contrib_segments},)f8'),
('segment_id', f'({self.max_contrib_segments},)u8')
('segment_ids', f'({self.max_contrib_segments},)i8')
])

self.data_manager.create_dset(self.merged_name, dtype=self.merged_dtype)
Expand Down Expand Up @@ -106,31 +106,21 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
'''

has_mc_truth = seg_fracs[0] is not None
has_mc_truth = seg_fracs is not None
iteration_count = 0
mask = hits.mask['id'].copy()
new_hits = hits.data.copy()
weights = weights.data.copy()
old_ids = hits.data['id'].copy()[...,np.newaxis]
old_id_mask = hits.mask['id'].copy()[...,np.newaxis]
if has_mc_truth:
new_seg_bt = np.array(seg_fracs[0])
new_frac_bt = np.array(seg_fracs[1])
hit_contributions = np.full(shape=weights.shape+(3,self.max_contrib_segments),fill_value=0.)
# initialize hit contribution lists with unmerged data.
# [there is probably a more pythonic way of doing this...]
for it, q in np.ndenumerate(weights):
if len(new_frac_bt[it]) > 1:
print('!!!!!!!!!!!!!!!!!')
break
counter=0
for entry_it, entry in enumerate(new_frac_bt[it][0]):
if abs(entry) < 0.001: continue
hit_contributions[it][0][counter] = q
hit_contributions[it][1][counter] = new_frac_bt[it][0][entry_it]
hit_contributions[it][2][counter] = new_seg_bt[it][0][entry_it]['segment_id']
counter+=1

prompt_hit_max_seg_contrib = seg_fracs['segment_ids'].shape[-1]
hit_contributions[:,:,0,:] = np.pad(np.expand_dims(weights,axis=2),((0,0),(0,0),(0,self.max_contrib_segments-1)),'mean')
hit_contributions[:,:,1,:] = np.pad(np.squeeze(seg_fracs['fraction']),((0,0),(0,0),(0, self.max_contrib_segments-prompt_hit_max_seg_contrib)),'constant',constant_values=0)
hit_contributions[:,:,2,:] = np.pad(np.squeeze(seg_fracs['segment_ids']),((0,0),(0,0),(0, self.max_contrib_segments-prompt_hit_max_seg_contrib)),'constant',constant_values=-1)
while new_hits.size > 0 and iteration_count != max_steps:
iteration_count += 1
# algorithm is iterative, but typically only needs to loop a few (~2-3) times
Expand Down Expand Up @@ -213,9 +203,9 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
hit_contributions[...,:-1][hit_it][0][e+comb_it] = hit_contributions[...,:][hit_it[0],hit_it[1]+1][0][comb_it]
hit_contributions[...,:-1][hit_it][2][e+comb_it] = hit_contributions[...,:][hit_it[0],hit_it[1]+1][2][comb_it]
# and remove them from the hit that was merged in
hit_contributions[hit_it[0],hit_it[1]+1][1][comb_it] = 0
hit_contributions[hit_it[0],hit_it[1]+1][0][comb_it] = 0.
hit_contributions[hit_it[0],hit_it[1]+1][2][comb_it] = 0.
hit_contributions[hit_it[0],hit_it[1]+1][1][comb_it] = 0.
hit_contributions[hit_it[0],hit_it[1]+1][2][comb_it] = -1

# now we mask off hits that have already been merged
mask[...,1:] = mask[...,1:] | to_merge
Expand Down Expand Up @@ -249,7 +239,9 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
for hit_it, hit in np.ndenumerate(new_hits):
if mask[hit_it]: continue
hit_contr = hit_contributions[hit_it]

# renormalize the fractional contributions given the charge weighted average
# YC, 2024-06-03: I think we should check this norm is consistent with the sum of Q from all contributed prompt hits
norm = np.sum(np.multiply(hit_contr[0],hit_contr[1]))
if norm == 0.: norm = 1.
tmp_bt_0 = np.multiply(hit_contr[0],hit_contr[1])/norm # fractional contributions
Expand All @@ -267,9 +259,9 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
bt_unique_segs = np.take_along_axis(bt_unique_segs, isort, axis=-1)
bt_unique_frac = np.take_along_axis(bt_unique_frac, isort, axis=-1)
back_track[hit_it]['fraction'] = [0.]*self.max_contrib_segments
back_track[hit_it]['segment_id'] = [0]*self.max_contrib_segments
back_track[hit_it]['segment_ids'] = [-1]*self.max_contrib_segments
back_track[hit_it]['fraction'][:bt_unique_frac.shape[0]] = bt_unique_frac
back_track[hit_it]['segment_id'][:bt_unique_segs.shape[0]] = bt_unique_segs
back_track[hit_it]['segment_ids'][:bt_unique_segs.shape[0]] = bt_unique_segs
else: back_track = None

new_hit_idx = np.broadcast_to(np.cumsum(~mask.ravel(), axis=0).reshape(mask.shape + (1,)), old_ids.shape)-1
Expand All @@ -285,10 +277,9 @@ def run(self, source_name, source_slice, cache):

event_id = np.r_[source_slice]
packet_frac_bt = cache['packet_frac_backtrack']
packet_seg_bt = cache['packet_seg_backtrack']
hits = cache[self.hits_name]

merged, ref, back_track = self.merge_hits(hits, weights=hits['Q'], seg_fracs=[packet_seg_bt,packet_frac_bt],dt_cut=self.merge_cut, sum_fields=self.sum_fields, weighted_mean_fields=self.weighted_mean_fields, max_steps=self.max_merge_steps, mode=self.merge_mode)
merged, ref, back_track = self.merge_hits(hits, weights=hits['Q'], seg_fracs=packet_frac_bt,dt_cut=self.merge_cut, sum_fields=self.sum_fields, weighted_mean_fields=self.weighted_mean_fields, max_steps=self.max_merge_steps, mode=self.merge_mode)

merged_mask = merged.mask['id']

Expand Down
90 changes: 47 additions & 43 deletions src/proto_nd_flow/reco/charge/calib_prompt_hits.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,39 +102,12 @@ def __init__(self, **params):
self.t0_dset_name = params.get('t0_dset_name')
self.pedestal_file = params.get('pedestal_file', '')
self.configuration_file = params.get('configuration_file', '')
self.max_contrib_segments = params.get('max_contrib_segments','')

def init(self, source_name):
super(CalibHitBuilder, self).init(source_name)
self.load_pedestals()
self.load_configurations()

self.hit_frac_dtype = np.dtype([
('fraction', f'({self.max_contrib_segments},)f8'),
('segment_id', f'({self.max_contrib_segments},)u8')
])

# save all config info
self.data_manager.set_attrs(self.calib_hits_dset_name,
classname=self.classname,
class_version=self.class_version,
source_dset=source_name,
packets_dset=self.packets_dset_name,
t0_dset=self.t0_dset_name,
pedestal_file=self.pedestal_file,
configuration_file=self.configuration_file
)

# then set up new datasets
self.data_manager.create_dset(self.calib_hits_dset_name, dtype=self.calib_hits_dtype)
if resources['RunData'].is_mc:
self.data_manager.create_dset(self.mc_hit_frac_dset_name, dtype=self.hit_frac_dtype)
self.data_manager.create_ref(source_name, self.calib_hits_dset_name)
self.data_manager.create_ref(self.calib_hits_dset_name, self.packets_dset_name)
self.data_manager.create_ref(self.events_dset_name, self.calib_hits_dset_name)
if resources['RunData'].is_mc:
self.data_manager.create_ref(self.calib_hits_dset_name, self.mc_hit_frac_dset_name)

def run(self, source_name, source_slice, cache):
super(CalibHitBuilder, self).run(source_name, source_slice, cache)
events_data = cache[self.events_dset_name]
Expand All @@ -146,6 +119,8 @@ def run(self, source_name, source_slice, cache):
t0_data = cache[self.t0_dset_name]
raw_hits = cache[self.raw_hits_dset_name]

has_mc_truth = packet_frac_bt is not None

mask = ~rfn.structured_to_unstructured(packets_data.mask).any(axis=-1)
rh_mask = ~rfn.structured_to_unstructured(raw_hits.mask).any(axis=-1)

Expand All @@ -157,16 +132,36 @@ def run(self, source_name, source_slice, cache):
packets_arr = packets_data.data[mask]
if resources['RunData'].is_mc:
packet_frac_bt_arr = packet_frac_bt.data[mask]
packet_frac_bt_arr = np.concatenate(packet_frac_bt_arr)
packet_seg_bt_arr = packet_seg_bt.data[mask]
index_arr = packets_index.data[mask]
else:
n = 0
index_arr = np.zeros((0,), dtype=packets_index.dtype)

if resources['RunData'].is_mc:
has_mc_truth = packet_seg_bt is not None
else:
has_mc_truth = False
if has_mc_truth and ('x_true_seg_t' not in self.calib_hits_dtype.fields):
self.calib_hits_dtype = np.dtype(self.calib_hits_dtype.descr + [('x_true_seg_t', f'({packet_seg_bt.shape[-1]},)f8'), ('E_true_recomb_elife', f'({packet_seg_bt.shape[-1]},)f8')])

# save all config info
self.data_manager.set_attrs(self.calib_hits_dset_name,
classname=self.classname,
class_version=self.class_version,
source_dset=source_name,
packets_dset=self.packets_dset_name,
t0_dset=self.t0_dset_name,
pedestal_file=self.pedestal_file,
configuration_file=self.configuration_file
)

# then set up new datasets
self.data_manager.create_dset(self.calib_hits_dset_name, dtype=self.calib_hits_dtype)
if has_mc_truth:
self.data_manager.create_dset(self.mc_hit_frac_dset_name, dtype=packet_frac_bt_arr.dtype)
self.data_manager.create_ref(source_name, self.calib_hits_dset_name)
self.data_manager.create_ref(self.calib_hits_dset_name, self.packets_dset_name)
self.data_manager.create_ref(self.events_dset_name, self.calib_hits_dset_name)
if has_mc_truth:
self.data_manager.create_ref(self.calib_hits_dset_name, self.mc_hit_frac_dset_name)

# reserve new data
calib_hits_slice = self.data_manager.reserve_data(self.calib_hits_dset_name, n)
Expand Down Expand Up @@ -198,11 +193,16 @@ def run(self, source_name, source_slice, cache):
hit_t0[first_index:last_index] = np.full(n_not_masked,t0)
first_index += n_not_masked

drift_t = raw_hits_arr['ts_pps'] - hit_t0

drift_t = raw_hits_arr['ts_pps'] - hit_t0 #ticks
drift_d = drift_t * (resources['LArData'].v_drift * resources['RunData'].crs_ticks) / units.cm # convert mm -> cm
x = resources['Geometry'].get_drift_coordinate(packets_arr['io_group'],packets_arr['io_channel'],drift_d)

# true drift position pair
if has_mc_truth:
drift_t_true = packet_seg_bt_arr['t'] #us
drift_d_true = drift_t_true * (resources['LArData'].v_drift) / units.cm # convert mm -> cm
x_true_seg_t = resources['Geometry'].get_drift_coordinate(packets_arr['io_group'],packets_arr['io_channel'],drift_d_true)

zy = resources['Geometry'].pixel_coordinates_2D[packets_arr['io_group'],
packets_arr['io_channel'], packets_arr['chip_id'], packets_arr['channel_id']]
tile_id = resources['Geometry'].tile_id[packets_arr['io_group'],packets_arr['io_channel']]
Expand All @@ -219,27 +219,30 @@ def run(self, source_name, source_slice, cache):
for unique_id in hit_uniqueid_str])
calib_hits_arr['id'] = calib_hits_slice.start + np.arange(n, dtype=int)
calib_hits_arr['x'] = x
if has_mc_truth:
calib_hits_arr['x_true_seg_t'] = x_true_seg_t
calib_hits_arr['y'] = zy[:,1]
calib_hits_arr['z'] = zy[:,0]
calib_hits_arr['ts_pps'] = raw_hits_arr['ts_pps']
calib_hits_arr['t_drift'] = drift_t
calib_hits_arr['io_group'] = packets_arr['io_group']
calib_hits_arr['io_channel'] = packets_arr['io_channel']
calib_hits_arr['Q'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped)
#!!! hardcoding W_ion, R=0.7, and not accounting for finite electron lifetime
calib_hits_arr['E'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped) * 23.6e-3 / 0.7

# create truth-level backtracking dataset
hits_charge = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped) # ke-
calib_hits_arr['Q'] = hits_charge # ke-
#FIXME supply more realistic dEdx in the recombination; also apply measured electron lifetime
calib_hits_arr['E'] = hits_charge * (1000 * units.e) / resources['LArData'].ionization_recombination(mode=2,dEdx=2) * (resources['LArData'].ionization_w / units.MeV) # MeV
if has_mc_truth:
back_track = np.full(shape=packets_arr.shape,fill_value=0.,dtype=self.hit_frac_dtype)
for hit_it, pack in np.ndenumerate(packets_arr):
back_track[hit_it]['fraction'] = packet_frac_bt_arr[hit_it]
back_track[hit_it]['segment_id'] = packet_seg_bt_arr[hit_it]['segment_id']
true_recomb = resources['LArData'].ionization_recombination(mode=1,dEdx=packet_seg_bt_arr['dEdx'])
calib_hits_arr['E_true_recomb_elife'] = np.divide(hits_charge.reshape((hits_charge.shape[0],1)) * (1000 * units.e), true_recomb, out=np.zeros_like(true_recomb), where=true_recomb!=0) / resources['LArData'].charge_reduction_lifetime(t_drift=drift_t_true) * (resources['LArData'].ionization_w / units.MeV) # MeV

# if back tracking information was available, write the merged back tracking
# dataset to file
if has_mc_truth:
self.data_manager.write_data(self.mc_hit_frac_dset_name, hit_bt_slice, back_track)
# make sure packets and packet backtracking match in numbers
if packets_arr.shape[0] == packet_frac_bt_arr.shape[0]:
self.data_manager.write_data(self.mc_hit_frac_dset_name, hit_bt_slice, packet_frac_bt_arr)
else:
raise Exception("The data packet and backtracking info do not match in size.")

# write
self.data_manager.write_data(self.calib_hits_dset_name, calib_hits_slice, calib_hits_arr)
Expand Down Expand Up @@ -277,3 +280,4 @@ def load_configurations(self):
with open(self.configuration_file, 'r') as infile:
for key, value in json.load(infile).items():
self.configuration[key] = value

Loading

0 comments on commit 4f4f7f0

Please sign in to comment.