In [2]:
import uproot
import pandas as pd
import gc

In [3]:
ROOT_INPUT_PATH = 'input_root/341294_afp_hits.root'

file = uproot.open(ROOT_INPUT_PATH)
tree = file['TreeHits']
tree.show()
dataset_t = tree.arrays(['hits','hits_row', 'hits_col', 'hits_q', 'timestamp', 'bcid', 'lmiBl', 'mu'], library='pd', entry_stop = None)
dataset = dataset_t.copy()
del dataset_t
gc.collect()
dataset.head()


# number of hits
dataset['left_hits_n'] = dataset.filter(regex='^hits\\[[01]',axis=1).sum(axis=1)
dataset['right_hits_n'] = dataset.filter(regex='^hits\\[[23]',axis=1).sum(axis=1)
dataset.drop(dataset.filter(regex='^hits\\[',axis=1), axis=1, inplace=True)

#average coordinates
weights_left = dataset.filter(regex='^hits_q\\[[01]',axis=1).where(dataset.filter(regex='^hits_q\[',axis=1) > 0.0, 0)
weights_right = dataset.filter(regex='^hits_q\\[[23]',axis=1).where(dataset.filter(regex='^hits_q\[',axis=1) > 0.0, 0)
dataset.drop(dataset.filter(regex='^hits_q',axis=1), axis=1, inplace=True)

rows_left = dataset.filter(regex='^hits_row\\[[01]',axis=1)
rows_right = dataset.filter(regex='^hits_row\\[[23]',axis=1)
dataset.drop(dataset.filter(regex='^hits_row',axis=1), axis=1, inplace=True)
dataset['left_hit_row'] = (rows_left * weights_left.values).sum(axis=1) / weights_left.sum(axis = 1)
dataset['right_hit_row'] = (rows_right * weights_right.values).sum(axis=1) / weights_right.sum(axis = 1)
del [rows_left, rows_right]
gc.collect()

columns_left = dataset.filter(regex='^hits_col\\[[01]',axis=1)
columns_right = dataset.filter(regex='^hits_col\\[[23]',axis=1)
dataset.drop(dataset.filter(regex='^hits_col',axis=1), axis=1, inplace=True)
dataset['left_hit_column'] = (columns_left * weights_left.values).sum(axis=1) / weights_left.sum(axis = 1)
dataset['right_hit_column'] = (columns_right * weights_right.values).sum(axis=1) / weights_right.sum(axis = 1)
del [columns_left, columns_right]
del [weights_left, weights_right]
gc.collect()

dataset.head()


name                 | typename                 | interpretation                
---------------------+--------------------------+-------------------------------
evN                  | int32_t                  | AsDtype('>i4')
lmiBl                | int32_t                  | AsDtype('>i4')
mu                   | float                    | AsDtype('>f4')
timestamp            | uint32_t                 | AsDtype('>u4')
bcid                 | uint32_t                 | AsDtype('>u4')
hits                 | int32_t[4][4]            | AsDtype("('>i4', (4, 4))")
hits_row             | int32_t[4][4][100]       | AsDtype("('>i4', (4, 4, 100...
hits_col             | int32_t[4][4][100]       | AsDtype("('>i4', (4, 4, 100...
hits_q               | float[4][4][100]         | AsDtype("('>f4', (4, 4, 100...


  out[name] = series[name]


Unnamed: 0,timestamp,bcid,lmiBl,mu,left_hits_n,right_hits_n,left_hit_row,right_hit_row,left_hit_column,right_hit_column
0,1511286284,2801,157,2.327614,0,13,-538351400.0,61276660.0,-375146700.0,768098800.0
1,1511286290,2180,157,1.908377,0,11,-538351400.0,60629780.0,-374681000.0,768098800.0
2,1511286291,2357,157,1.598215,16,1,-535213500.0,60619850.0,-376627900.0,768098800.0
3,1511286301,3161,157,1.757314,0,10,-538351400.0,61264050.0,-375146800.0,768098800.0
4,1511286304,2215,157,2.355958,0,11,-538351400.0,60619970.0,-375263000.0,768098800.0


In [None]:
# normalization - temporary as different cell as it is not optimal solution
buffor = dataset.drop(['right_hits_n', 'right_hit_row', 'right_hit_column'])
buffor.rename(columns={'right_hits_n': 'hits_n', 'right_hit_row': 'hit_row', 'right_hit_column': 'hit_column'}, inplace = True)

dataset.drop(['left_hits_n', 'left_hit_row', 'left_hit_column'], inplace = True)
dataset = dataset.rename(columns={'left_hits_n': 'hits_n', 'left_hit_row': 'hit_row', 'left_hit_column': 'hit_column'}, inplace = True)


In [7]:
print(dataset)

        timestamp  bcid  lmiBl  ...  right_hit_row  left_hit_column  \
0      1511286284  2801    157  ...   6.127666e+07    -3.751467e+08   
1      1511286290  2180    157  ...   6.062978e+07    -3.746810e+08   
2      1511286291  2357    157  ...   6.061985e+07    -3.766279e+08   
3      1511286301  3161    157  ...   6.126405e+07    -3.751468e+08   
4      1511286304  2215    157  ...   6.061997e+07    -3.752630e+08   
...           ...   ...    ...  ...            ...              ...   
18692  1511290367   389    226  ...   5.222989e+05    -9.151355e+08   
18693  1511290734   991    232  ...   5.221842e+05    -9.151355e+08   
18694  1511290820  1394    234  ...   5.221842e+05    -9.151355e+08   
18695  1511291383  1356    243  ...   4.871078e+05    -9.151355e+08   
18696  1511291522   110    245  ...   4.871078e+05    -9.151355e+08   

       right_hit_column  
0          7.680988e+08  
1          7.680988e+08  
2          7.680988e+08  
3          7.680988e+08  
4          7.6809

In [8]:
preview = tree.arrays(['hits_q'], library='pd', entry_stop = 1000)
pd.set_option('display.max_rows', 1000, 'display.max_columns', 6)
print(preview)

  out[name] = series[name]


        timestamp  bcid  lmiBl  ...  right_hit_row  left_hit_column  \
0      1511286284  2801    157  ...   6.127666e+07    -3.751467e+08   
1      1511286290  2180    157  ...   6.062978e+07    -3.746810e+08   
2      1511286291  2357    157  ...   6.061985e+07    -3.766279e+08   
3      1511286301  3161    157  ...   6.126405e+07    -3.751468e+08   
4      1511286304  2215    157  ...   6.061997e+07    -3.752630e+08   
...           ...   ...    ...  ...            ...              ...   
18692  1511290367   389    226  ...   5.222989e+05    -9.151355e+08   
18693  1511290734   991    232  ...   5.221842e+05    -9.151355e+08   
18694  1511290820  1394    234  ...   5.221842e+05    -9.151355e+08   
18695  1511291383  1356    243  ...   4.871078e+05    -9.151355e+08   
18696  1511291522   110    245  ...   4.871078e+05    -9.151355e+08   

       right_hit_column  
0          7.680988e+08  
1          7.680988e+08  
2          7.680988e+08  
3          7.680988e+08  
4          7.6809