To Adapt Russell's code for making nice event displays and examine some of my own data that way!

In [2]:
# python
import numpy as np
import time as t

# physics
import uproot as ur
import awkward as ak
import vector as vec
print("Awkward version: "+str(ak.__version__))
print("Uproot version: "+str(ur.__version__))
print("Vector version: "+str(vec.__version__))

# visualization tools
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import Normalize

Awkward version: 1.2.2
Uproot version: 4.0.7
Vector version: 0.8.1


In [3]:
track_branches = ['trackEta_EMB1', 'trackPhi_EMB1', 'trackEta_EMB2', 'trackPhi_EMB2', 'trackEta_EMB3', 'trackPhi_EMB3',
                  'trackEta_TileBar0', 'trackPhi_TileBar0', 'trackEta_TileBar1', 'trackPhi_TileBar1',
                  'trackEta_TileBar2', 'trackPhi_TileBar2']

event_branches = ["cluster_nCells", "cluster_cell_ID", "cluster_cell_E", 'cluster_nCells', "nCluster", "eventNumber",
                  "nTrack", "nTruthPart", "truthPartPdgId", "cluster_Eta", "cluster_Phi", 'trackPt', 'trackP',
                  'trackMass', 'trackEta', 'trackPhi', 'truthPartE', 'cluster_ENG_CALIB_TOT']

geo_branches = ["cell_geo_ID", "cell_geo_eta", "cell_geo_phi", "cell_geo_rPerp", "cell_geo_sampling"]

In [4]:
#helper function definitions:

def dict_from_event_tree(_event_tree, _branches):
    ''' The purpose for this separate function is to load np arrays where possible. '''
    _special_keys = ["nCluster", "eventNumber", "nTrack", "nTruthPart"]
    _dict = dict()
    for _key in _branches:
        if _key in _special_keys:
            _branch = _event_tree.arrays(filter_name=_key)[_key].to_numpy()
        else:
            _branch = _event_tree.arrays(filter_name=_key)[_key]
        _dict[_key] = _branch
    return _dict

def dict_from_tree_branches(_tree, _branches):
    ''' Helper function to put event data in branches to make things easier to pass to functions,
    pretty self explanatory. '''
    _dict = dict()
    for _key in _branches:
        _branch = _tree.arrays(filter_name=_key)[_key]
        _dict[_key] = _branch
    return _dict

def dict_from_tree_branches_np(_tree, _branches):
    ''' Helper function to put event data in branches to make things easier to pass to functions,
    pretty self explanatory. This always returns np arrays in the dict. '''
    _dict = dict()
    for _key in _branches:
        _branch = np.ndarray.flatten(_tree.arrays(filter_name=_key)[_key].to_numpy())
        _dict[_key] = _branch
    return _dict

def find_index_1D(_values, _unsorted):
    ''' This function is a vectorized helper function to return the 1D locations of elements in a 
    larger unsorted list (i.e. cell geo locations). It does this by repeating/tiling arrays so they
    have equal shape and then simply comparing where the elements are equal and returning a slice
    where the indices match.
    Inputs: 
        _values: the values for which to search for the locations of in _unsorted
        _unsorted: the larger un-sorted list of elements to match
    Returns:
        _locs: locations of _values in _unsorted respectively. '''
    _shape = (len(_values), len(_unsorted))
    _values_2d = np.repeat(_values, repeats=len(_unsorted)).reshape(_shape)
    _unsorted_2d = np.tile(_unsorted, reps=len(_values)).reshape(_shape)
    
    _idx_vec = np.argwhere(np.asarray(_values_2d == _unsorted_2d))
    return _idx_vec[:,1]

def to_xyz(_coords):
    ''' Simple geometric conversion to xyz from eta, phi, rperp (READ: in this order)
    There is an elegant way to generalize this to be flexible for 1d or 2d, for now 2d
    Inputs: np array of shape (N, 3) where columns are [eta, phi, rPerp]
    Outputs: np array of shape (N, 3) where columns are [x,y,z] '''
    _eta = _coords[:,0]
    _phi = _coords[:,1]
    _rperp = _coords[:,2]
    _theta = 2*np.arctan( np.exp(-_eta) )
    
    cell_x = _rperp*np.cos(_phi)
    cell_y = _rperp*np.sin(_phi)
    cell_z = _rperp/np.tan(_theta)
    
    return np.column_stack([cell_x,cell_y,cell_z])

In [5]:
data_path = '/fast_scratch/atlas_images/v01-45/'
pi0 = ur.open(data_path+'pi0_medium.root')
pi0.keys()

['EventTree;1', 'CellGeo;1']

In [6]:
event_tree = pi0['EventTree']
geo_tree = pi0['CellGeo']

In [7]:
event_dict = dict_from_event_tree(event_tree, event_branches)
track_dict = dict_from_tree_branches(event_tree, track_branches)
geo_dict = dict_from_tree_branches_np(geo_tree, geo_branches)

In [8]:
len(event_dict)

17

In [9]:
event = 0

In [10]:
ak_cluster_cell_ID = event_dict['cluster_cell_ID'][event]
cell_geo_ID = geo_dict['cell_geo_ID']

nClust = len(ak_cluster_cell_ID)
_cell_idx = np.zeros(nClust, dtype=np.int32)
    
clusters = []
for j in range(nClust):

    _cell_ID = ak_cluster_cell_ID[j]
    _cell_idx[j] = np.argmax(_cell_ID == cell_geo_ID)

    # make empty array of cluster info
    _cluster = np.zeros((_nInClust, 5))
    
    # index matching
    _indices = find_index_1D(_cell_idx[j].to_numpy(), cell_geo_ID)

    _cluster[:,0] = geo_dict["cell_geo_eta"][_indices]
    _cluster[:,1] = geo_dict["cell_geo_phi"][_indices]
    _cluster[:,2] = geo_dict["cell_geo_rPerp"][_indices]
    _cluster[:,3] = event_dict["cluster_cell_E"][event][j].to_numpy()
    _cluster[:,4] = geo_dict["cell_geo_sampling"][_indices]
    
    clusters.append(_cluster)

ValueError: operands could not be broadcast together with shapes (1,24) (1,3753000) 

In [14]:
len(geo_dict["cell_geo_eta"][_indices])

480