Merge pull request #128 from DUNE/feature_backtrack

mc_truth/interaction + true_pos in prompt_hit + backtrack
DUNE · Jun 7, 2024 · 4f4f7f0 · 4f4f7f0
2 parents f58217e + ad12385
commit 4f4f7f0
Show file tree

Hide file tree

Showing 8 changed files with 209 additions and 147 deletions.
diff --git a/src/proto_nd_flow/reco/charge/calib_hit_merger.py b/src/proto_nd_flow/reco/charge/calib_hit_merger.py
@@ -68,7 +68,7 @@ def init(self, source_name):
 
         self.hit_frac_dtype = np.dtype([
             ('fraction', f'({self.max_contrib_segments},)f8'),
-            ('segment_id', f'({self.max_contrib_segments},)u8')
+            ('segment_ids', f'({self.max_contrib_segments},)i8')
         ])
 
         self.data_manager.create_dset(self.merged_name, dtype=self.merged_dtype)
@@ -106,31 +106,21 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
 
         '''
 
-        has_mc_truth = seg_fracs[0] is not None
+        has_mc_truth = seg_fracs is not None
         iteration_count = 0
         mask = hits.mask['id'].copy()
         new_hits = hits.data.copy()
         weights = weights.data.copy()
         old_ids = hits.data['id'].copy()[...,np.newaxis]
         old_id_mask = hits.mask['id'].copy()[...,np.newaxis]
         if has_mc_truth:
-            new_seg_bt = np.array(seg_fracs[0])
-            new_frac_bt = np.array(seg_fracs[1])
             hit_contributions = np.full(shape=weights.shape+(3,self.max_contrib_segments),fill_value=0.)
             # initialize hit contribution lists with unmerged data.
             # [there is probably a more pythonic way of doing this...]
-            for it, q in np.ndenumerate(weights):
-                if len(new_frac_bt[it]) > 1:
-                    print('!!!!!!!!!!!!!!!!!')
-                    break
-                counter=0
-                for entry_it, entry in enumerate(new_frac_bt[it][0]):
-                    if abs(entry) < 0.001: continue
-                    hit_contributions[it][0][counter] = q
-                    hit_contributions[it][1][counter] = new_frac_bt[it][0][entry_it]
-                    hit_contributions[it][2][counter] = new_seg_bt[it][0][entry_it]['segment_id']
-                    counter+=1
-
+            prompt_hit_max_seg_contrib = seg_fracs['segment_ids'].shape[-1]
+            hit_contributions[:,:,0,:] = np.pad(np.expand_dims(weights,axis=2),((0,0),(0,0),(0,self.max_contrib_segments-1)),'mean')
+            hit_contributions[:,:,1,:] = np.pad(np.squeeze(seg_fracs['fraction']),((0,0),(0,0),(0, self.max_contrib_segments-prompt_hit_max_seg_contrib)),'constant',constant_values=0)
+            hit_contributions[:,:,2,:] = np.pad(np.squeeze(seg_fracs['segment_ids']),((0,0),(0,0),(0, self.max_contrib_segments-prompt_hit_max_seg_contrib)),'constant',constant_values=-1)
         while new_hits.size > 0 and iteration_count != max_steps:
             iteration_count += 1
             # algorithm is iterative, but typically only needs to loop a few (~2-3) times
@@ -213,9 +203,9 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
                             hit_contributions[...,:-1][hit_it][0][e+comb_it] = hit_contributions[...,:][hit_it[0],hit_it[1]+1][0][comb_it]
                             hit_contributions[...,:-1][hit_it][2][e+comb_it] = hit_contributions[...,:][hit_it[0],hit_it[1]+1][2][comb_it]
                             # and remove them from the hit that was merged in
-                            hit_contributions[hit_it[0],hit_it[1]+1][1][comb_it] = 0
                             hit_contributions[hit_it[0],hit_it[1]+1][0][comb_it] = 0.
-                            hit_contributions[hit_it[0],hit_it[1]+1][2][comb_it] = 0.
+                            hit_contributions[hit_it[0],hit_it[1]+1][1][comb_it] = 0.
+                            hit_contributions[hit_it[0],hit_it[1]+1][2][comb_it] = -1
 
                 # now we mask off hits that have already been merged
                 mask[...,1:] = mask[...,1:] | to_merge
@@ -249,7 +239,9 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
             for hit_it, hit in np.ndenumerate(new_hits):
                 if mask[hit_it]: continue
                 hit_contr = hit_contributions[hit_it]
+
                 # renormalize the fractional contributions given the charge weighted average
+                # YC, 2024-06-03: I think we should check this norm is consistent with the sum of Q from all contributed prompt hits
                 norm = np.sum(np.multiply(hit_contr[0],hit_contr[1]))
                 if norm == 0.: norm = 1.
                 tmp_bt_0 = np.multiply(hit_contr[0],hit_contr[1])/norm # fractional contributions
@@ -267,9 +259,9 @@ def merge_hits(self,hits, weights, seg_fracs, dt_cut, sum_fields=None, weighted_
                 bt_unique_segs = np.take_along_axis(bt_unique_segs, isort, axis=-1)
                 bt_unique_frac = np.take_along_axis(bt_unique_frac, isort, axis=-1)
                 back_track[hit_it]['fraction'] = [0.]*self.max_contrib_segments
-                back_track[hit_it]['segment_id'] = [0]*self.max_contrib_segments
+                back_track[hit_it]['segment_ids'] = [-1]*self.max_contrib_segments
                 back_track[hit_it]['fraction'][:bt_unique_frac.shape[0]] = bt_unique_frac
-                back_track[hit_it]['segment_id'][:bt_unique_segs.shape[0]] = bt_unique_segs
+                back_track[hit_it]['segment_ids'][:bt_unique_segs.shape[0]] = bt_unique_segs
         else: back_track = None
 
         new_hit_idx = np.broadcast_to(np.cumsum(~mask.ravel(), axis=0).reshape(mask.shape + (1,)), old_ids.shape)-1
@@ -285,10 +277,9 @@ def run(self, source_name, source_slice, cache):
 
         event_id = np.r_[source_slice]
         packet_frac_bt = cache['packet_frac_backtrack']
-        packet_seg_bt = cache['packet_seg_backtrack']
         hits = cache[self.hits_name]
 
-        merged, ref, back_track = self.merge_hits(hits, weights=hits['Q'], seg_fracs=[packet_seg_bt,packet_frac_bt],dt_cut=self.merge_cut, sum_fields=self.sum_fields, weighted_mean_fields=self.weighted_mean_fields, max_steps=self.max_merge_steps, mode=self.merge_mode)
+        merged, ref, back_track = self.merge_hits(hits, weights=hits['Q'], seg_fracs=packet_frac_bt,dt_cut=self.merge_cut, sum_fields=self.sum_fields, weighted_mean_fields=self.weighted_mean_fields, max_steps=self.max_merge_steps, mode=self.merge_mode)
 
         merged_mask = merged.mask['id']
 

diff --git a/src/proto_nd_flow/reco/charge/calib_prompt_hits.py b/src/proto_nd_flow/reco/charge/calib_prompt_hits.py
@@ -102,39 +102,12 @@ def __init__(self, **params):
         self.t0_dset_name = params.get('t0_dset_name')
         self.pedestal_file = params.get('pedestal_file', '')
         self.configuration_file = params.get('configuration_file', '')
-        self.max_contrib_segments = params.get('max_contrib_segments','')
 
     def init(self, source_name):
         super(CalibHitBuilder, self).init(source_name)
         self.load_pedestals()
         self.load_configurations()
 
-        self.hit_frac_dtype = np.dtype([
-            ('fraction', f'({self.max_contrib_segments},)f8'),
-            ('segment_id', f'({self.max_contrib_segments},)u8')
-        ])
-
-        # save all config info
-        self.data_manager.set_attrs(self.calib_hits_dset_name,
-                                    classname=self.classname,
-                                    class_version=self.class_version,
-                                    source_dset=source_name,
-                                    packets_dset=self.packets_dset_name,
-                                    t0_dset=self.t0_dset_name,
-                                    pedestal_file=self.pedestal_file,
-                                    configuration_file=self.configuration_file
-                                    )
-
-        # then set up new datasets
-        self.data_manager.create_dset(self.calib_hits_dset_name, dtype=self.calib_hits_dtype)
-        if resources['RunData'].is_mc: 
-            self.data_manager.create_dset(self.mc_hit_frac_dset_name, dtype=self.hit_frac_dtype)
-        self.data_manager.create_ref(source_name, self.calib_hits_dset_name)
-        self.data_manager.create_ref(self.calib_hits_dset_name, self.packets_dset_name)
-        self.data_manager.create_ref(self.events_dset_name, self.calib_hits_dset_name)
-        if resources['RunData'].is_mc: 
-            self.data_manager.create_ref(self.calib_hits_dset_name, self.mc_hit_frac_dset_name)
-
     def run(self, source_name, source_slice, cache):
         super(CalibHitBuilder, self).run(source_name, source_slice, cache)
         events_data = cache[self.events_dset_name]
@@ -146,6 +119,8 @@ def run(self, source_name, source_slice, cache):
         t0_data = cache[self.t0_dset_name]
         raw_hits = cache[self.raw_hits_dset_name]
 
+        has_mc_truth = packet_frac_bt is not None
+
         mask = ~rfn.structured_to_unstructured(packets_data.mask).any(axis=-1)
         rh_mask = ~rfn.structured_to_unstructured(raw_hits.mask).any(axis=-1)
 
@@ -157,16 +132,36 @@ def run(self, source_name, source_slice, cache):
             packets_arr = packets_data.data[mask]
             if resources['RunData'].is_mc:
                 packet_frac_bt_arr = packet_frac_bt.data[mask]
+                packet_frac_bt_arr = np.concatenate(packet_frac_bt_arr)
                 packet_seg_bt_arr = packet_seg_bt.data[mask]
             index_arr = packets_index.data[mask]
         else:
             n = 0
             index_arr = np.zeros((0,), dtype=packets_index.dtype)
 
-        if resources['RunData'].is_mc:
-            has_mc_truth = packet_seg_bt is not None
-        else:
-            has_mc_truth = False
+        if has_mc_truth and ('x_true_seg_t' not in self.calib_hits_dtype.fields):
+            self.calib_hits_dtype = np.dtype(self.calib_hits_dtype.descr + [('x_true_seg_t', f'({packet_seg_bt.shape[-1]},)f8'), ('E_true_recomb_elife', f'({packet_seg_bt.shape[-1]},)f8')])
+
+        # save all config info
+        self.data_manager.set_attrs(self.calib_hits_dset_name,
+                                    classname=self.classname,
+                                    class_version=self.class_version,
+                                    source_dset=source_name,
+                                    packets_dset=self.packets_dset_name,
+                                    t0_dset=self.t0_dset_name,
+                                    pedestal_file=self.pedestal_file,
+                                    configuration_file=self.configuration_file
+                                    )
+
+        # then set up new datasets
+        self.data_manager.create_dset(self.calib_hits_dset_name, dtype=self.calib_hits_dtype)
+        if has_mc_truth:
+            self.data_manager.create_dset(self.mc_hit_frac_dset_name, dtype=packet_frac_bt_arr.dtype)
+        self.data_manager.create_ref(source_name, self.calib_hits_dset_name)
+        self.data_manager.create_ref(self.calib_hits_dset_name, self.packets_dset_name)
+        self.data_manager.create_ref(self.events_dset_name, self.calib_hits_dset_name)
+        if has_mc_truth:
+            self.data_manager.create_ref(self.calib_hits_dset_name, self.mc_hit_frac_dset_name)
 
         # reserve new data
         calib_hits_slice = self.data_manager.reserve_data(self.calib_hits_dset_name, n)
@@ -198,11 +193,16 @@ def run(self, source_name, source_slice, cache):
                     hit_t0[first_index:last_index] = np.full(n_not_masked,t0)
                     first_index += n_not_masked
 
-            drift_t = raw_hits_arr['ts_pps'] - hit_t0
-
+            drift_t = raw_hits_arr['ts_pps'] - hit_t0 #ticks
             drift_d = drift_t * (resources['LArData'].v_drift * resources['RunData'].crs_ticks) / units.cm # convert mm -> cm
             x = resources['Geometry'].get_drift_coordinate(packets_arr['io_group'],packets_arr['io_channel'],drift_d)
 
+            # true drift position pair
+            if has_mc_truth:
+                drift_t_true = packet_seg_bt_arr['t'] #us
+                drift_d_true = drift_t_true * (resources['LArData'].v_drift) / units.cm # convert mm -> cm
+                x_true_seg_t = resources['Geometry'].get_drift_coordinate(packets_arr['io_group'],packets_arr['io_channel'],drift_d_true)
+
             zy = resources['Geometry'].pixel_coordinates_2D[packets_arr['io_group'],
                                                 packets_arr['io_channel'], packets_arr['chip_id'], packets_arr['channel_id']]
             tile_id = resources['Geometry'].tile_id[packets_arr['io_group'],packets_arr['io_channel']]
@@ -219,27 +219,30 @@ def run(self, source_name, source_slice, cache):
                             for unique_id in hit_uniqueid_str])
             calib_hits_arr['id'] = calib_hits_slice.start + np.arange(n, dtype=int)
             calib_hits_arr['x'] = x
+            if has_mc_truth:
+                calib_hits_arr['x_true_seg_t'] = x_true_seg_t
             calib_hits_arr['y'] = zy[:,1]
             calib_hits_arr['z'] = zy[:,0]
             calib_hits_arr['ts_pps'] = raw_hits_arr['ts_pps']
             calib_hits_arr['t_drift'] = drift_t
             calib_hits_arr['io_group'] = packets_arr['io_group']
             calib_hits_arr['io_channel'] = packets_arr['io_channel']
-            calib_hits_arr['Q'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped)
-            #!!! hardcoding W_ion, R=0.7, and not accounting for finite electron lifetime
-            calib_hits_arr['E'] = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped) * 23.6e-3 / 0.7
-
-            # create truth-level backtracking dataset
+            hits_charge = self.charge_from_dataword(packets_arr['dataword'],vref,vcm,ped) # ke-
+            calib_hits_arr['Q'] = hits_charge # ke-
+            #FIXME supply more realistic dEdx in the recombination; also apply measured electron lifetime
+            calib_hits_arr['E'] = hits_charge * (1000 * units.e) / resources['LArData'].ionization_recombination(mode=2,dEdx=2) * (resources['LArData'].ionization_w / units.MeV) # MeV
             if has_mc_truth:
-                back_track = np.full(shape=packets_arr.shape,fill_value=0.,dtype=self.hit_frac_dtype)
-                for hit_it, pack in np.ndenumerate(packets_arr):
-                    back_track[hit_it]['fraction'] = packet_frac_bt_arr[hit_it] 
-                    back_track[hit_it]['segment_id'] = packet_seg_bt_arr[hit_it]['segment_id']
+                true_recomb = resources['LArData'].ionization_recombination(mode=1,dEdx=packet_seg_bt_arr['dEdx'])
+                calib_hits_arr['E_true_recomb_elife'] = np.divide(hits_charge.reshape((hits_charge.shape[0],1)) * (1000 * units.e), true_recomb, out=np.zeros_like(true_recomb), where=true_recomb!=0) / resources['LArData'].charge_reduction_lifetime(t_drift=drift_t_true) * (resources['LArData'].ionization_w / units.MeV) # MeV
 
         # if back tracking information was available, write the merged back tracking
         # dataset to file 
         if has_mc_truth:
-            self.data_manager.write_data(self.mc_hit_frac_dset_name, hit_bt_slice, back_track)
+            # make sure packets and packet backtracking match in numbers
+            if packets_arr.shape[0] == packet_frac_bt_arr.shape[0]:
+                self.data_manager.write_data(self.mc_hit_frac_dset_name, hit_bt_slice, packet_frac_bt_arr)
+            else:
+                raise Exception("The data packet and backtracking info do not match in size.")
 
         # write
         self.data_manager.write_data(self.calib_hits_dset_name, calib_hits_slice, calib_hits_arr)
@@ -277,3 +280,4 @@ def load_configurations(self):
             with open(self.configuration_file, 'r') as infile:
                 for key, value in json.load(infile).items():
                     self.configuration[key] = value
+