In [10]:
import uproot
import pandas as pd
import numpy as np

def load_mc(filename="mcdata_taue2.root", step=1):
    f = uproot.open(filename)
    mc = f['Data'].pandas.df(["Event_id", "ele_P", "BT_X", "BT_Y",
                              "BT_Z","BT_SX", "BT_SY","ele_x", 
                              "ele_y", "ele_z", "ele_sx", "ele_sy", "chisquare", ], flatten=False)
    pmc = pd.DataFrame(mc)
    pmc['numtracks'] = pmc.BT_X.apply(lambda x: len(x))
    # cuts
    shapechange = [pmc.shape[0]]
    pmc = pmc[pmc.ele_P > 0.1]
    shapechange.append(pmc.shape[0])

    pmc = pmc[pmc.ele_z < 0]
    shapechange.append(pmc.shape[0])

    pmc = pmc[pmc.numtracks > 3]
    shapechange.append(pmc.shape[0])
    print("numtracks reduction by cuts: ", shapechange)
    pmc['m_BT_X'] = pmc.BT_X.apply(lambda x: x.mean())
    pmc['m_BT_Y'] = pmc.BT_Y.apply(lambda x: x.mean())
    pmc['m_BT_Z'] = pmc.BT_Z.apply(lambda x: x.mean())

    print("len(pmc): {len}".format(len=len(pmc)))
    return pmc

In [26]:
pmc = load_mc(filename='./data/mcdata_taue2.root', step=1)

numtracks reduction by cuts:  [18724, 18679, 9616, 9106]
len(pmc): 9106


In [27]:
pmc.iloc[0]

Event_id                                                  4586
ele_P                                                  36.6597
BT_X         [-12795.279, -12795.536, -12795.117, -12843.53...
BT_Y         [37317.43, 37316.953, 37316.758, 37391.957, 37...
BT_Z         [-26878.5, -26877.945, -26877.19, -25604.922, ...
BT_SX        [-0.03736506, -0.041414507, -0.03618029, -0.04...
BT_SY        [0.05847942, 0.058261085, 0.057884686, 0.05989...
ele_x                                                 -12791.5
ele_y                                                  37311.5
ele_z                                                   -26980
ele_sx                                               -0.046723
ele_sy                                               0.0754437
chisquare    [0.6740895, 0.85637194, 0.79782254, 0.77434564...
numtracks                                                 3664
m_BT_X                                                -14007.7
m_BT_Y                                                 

In [52]:
from tqdm import tqdm
def normalize_showers(pmc):
    showers = []
    for idx in tqdm(pmc.index):
        shower = pmc.loc[idx]
        shower_normalized = {
            'SX': shower['BT_X'] - shower['ele_x'],
            'SY': shower['BT_Y'] - shower['ele_y'],
            'SZ': shower['BT_Z'] - shower['ele_z'],
            'TX': np.tan(np.arctan(shower['BT_SX']) - np.arctan(shower['ele_sx'])),
            'TY': np.tan(np.arctan(shower['BT_SY']) - np.arctan(shower['ele_sy'])),
            'ele_P': [shower['ele_P'] for _ in range(len(shower['BT_X']))],
            #'chisquare': shower['chisquare']
        }
        shower_normalized['SX'] -= shower_normalized['SZ'] * shower['ele_sx']
        shower_normalized['SY'] -= shower_normalized['SZ'] * shower['ele_sy']
        showers.append(shower_normalized)
    return showers

In [53]:
showers = normalize_showers(pmc=pmc)

100%|██████████| 9106/9106 [00:43<00:00, 209.77it/s]


In [54]:
showers = [shower for shower in showers if len(shower['TX']) > 70]
showers = [shower for shower in showers if len(shower['TX']) < 3000]

In [55]:
len(showers)

8019

In [58]:
df = pd.DataFrame(showers)

In [59]:
df.to_csv('./data/showers.csv')

In [49]:
showers[0].keys()

dict_keys(['SX', 'SY', 'SZ', 'TX', 'TY', 'ele_P', 'chisquare'])

In [51]:
# len(showers[0]['SX']), len(showers[0]['SY']), len(showers[0]['SZ']), len(showers[0]['TX']), len(showers[0]['TY']), len(showers[0]['ele_P']), len(showers[0]['chisquare'])

(317, 317, 317, 317, 317, 317, 321)