In [59]:
import pandas as pd
import numpy as np
import uproot

In [173]:
def genie_parser(events):
    """ function to fetch the relevant information from genie events (in rootracker format)

    Parameters
    ----------
    events : dict
        The genie events

    Returns
    -------
    pd.DataFrame
        Data frame containing the relevant information
    """
    dic = {}
    dic['event_description'] = events['EvtCode/fString'].array(library='np')  # String describing the event
    dic['event_id'] = events['EvtNum'].array(library="np")  # Number of the event
    dic['event_children'] = events['StdHepN'].array(library="np")  # NUmber of the generated sub-particles
    dic['event_prob'] = events['EvtProb'].array(library="np")  # Probability of the event
    dic['event_xsec'] = events['EvtXSec'].array(library="np")  # Total xsec of the event
    dic['event_pdg_id'] = events['StdHepPdg'].array(library="np")  # PDG ids of all produced particles
    dic['event_momenta'] = events['StdHepP4'].array(library="np")  # Momenta of the particles
    tmp = events['EvtVtx'].array(library="np")  # Position of the particles
    dic['event_vertex'] = [np.array(vtx) for vtx in tmp]
    dic['event_coords'] = events['StdHepX4'].array(library="np")  # Position of the particles
    dic['event_weight'] = events['EvtWght'].array(library='np')  # Weight of the events
    tmp = events['StdHepStatus'].array(library="np")  # Status of the particle
    # Converting the codes
    particle_dic = {
        0: 'initial',
        1: 'final',
        2: 'intermediate',
        3: 'decayed',
        11: 'nucleon target',
        12: 'DIS pre-frag hadronic state',
        13: 'resonance',
        14: 'hadron in nucleus',
        15: 'final nuclear',
        16: 'nucleon cluster target',
    }
    new_arr = np.array([[
        particle_dic[particle] for particle in event
    ] for event in tmp], dtype=object)
    dic['event_status'] = new_arr
    return pd.DataFrame.from_dict(dic)

def final_parser(parsed_events: pd.DataFrame):
    """ fetches the final states

    Parameters
    ----------
    parsed_events : pd.DataFrame
        The parsed events

    Returns
    -------
    pd.DataFrame
        The inital + final state info
    """
    inital_energies_inj = np.array([event[0][3] for event in parsed_events['event_momenta']])
    inital_momenta_inj = [np.array(event[0][:3]) for event in parsed_events['event_momenta']]
    inital_energies_target = np.array([event[1][3] for event in parsed_events['event_momenta']])
    inital_id_inj = np.array([event[0] for event in parsed_events['event_pdg_id']])
    inital_id_target = np.array([event[1] for event in parsed_events['event_pdg_id']])
    final_ids = np.array([np.where(event == np.array('final'), True, False) for event in parsed_events['event_status']], dtype=object)
    children_ids = np.array([
        event[final_ids[id_event]] for id_event, event in enumerate(parsed_events['event_pdg_id'])
    ], dtype=object)
    children_energy = np.array([
        event[:, 3][final_ids[id_event]] for id_event, event in enumerate(parsed_events['event_momenta'])
    ], dtype=object)
    children_momenta = np.array([
        event[:, :3][final_ids[id_event]] for id_event, event in enumerate(parsed_events['event_momenta'])
    ], dtype=object)
    final_ids = np.array([np.where(event == np.array('final nuclear'), True, False) for event in parsed_events['event_status']], dtype=object)
    children_nuc_ids = np.array([
        event[final_ids[id_event]] for id_event, event in enumerate(parsed_events['event_pdg_id'])
    ], dtype=object)
    children_nuc_energy = np.array([
        event[:, 3][final_ids[id_event]] for id_event, event in enumerate(parsed_events['event_momenta'])
    ], dtype=object)
    dic = {}
    dic['event_descr'] = parsed_events['event_description']
    dic['event_xsec'] = parsed_events['event_xsec']
    dic['event_vertex'] = parsed_events.event_vertex
    dic['init_inj_e'] = inital_energies_inj
    dic['init_inj_p'] = inital_momenta_inj
    dic['init_target_e'] = inital_energies_target
    dic['init_inj_id'] = inital_id_inj
    dic['init_target_id'] = inital_id_target
    dic['final_ids'] = children_ids
    dic['final_e'] = children_energy
    dic['final_p'] = children_momenta
    dic['final_nuc_ids'] = children_nuc_ids
    dic['final_nuc_e'] = children_nuc_energy
    return pd.DataFrame.from_dict(dic)

In [176]:
with uproot.open('gtrac_1_nue.root') as file:
    events = file['gRooTracker']
    parsed_events = genie_parser(events)
    final_parsed = final_parser(parsed_events)

In [177]:
final_parsed

Unnamed: 0,event_descr,event_xsec,event_vertex,init_inj_e,init_inj_p,init_target_e,init_inj_id,init_target_id,final_ids,final_e,final_p,final_nuc_ids,final_nuc_e
0,"nu:14;tgt:1000060120;N:2112;proc:Weak[CC],QES;",2.417622,"[0.0, 0.0, 0.0, 0.0]",0.434245,"[0.0, 0.0, 0.4342451003287754]",11.174863,14,1000060120,"[13, 2212]","[0.2812121215868135, 1.0654573858712357]","[[0.1933804519351921, -0.14307357003140753, 0....",[2000000002],[10.262438692870726]
1,"nu:14;tgt:1000060120;N:2112;proc:Weak[CC],QES;",5.076822,"[0.0, 0.0, 0.0, 0.0]",2.323103,"[0.0, 0.0, 2.323102808265489]",11.174863,14,1000060120,"[13, 2212]","[2.2262342103757886, 1.0112779131884424]","[[0.30931396395741345, 0.0445505144730447, 2.2...",[2000000002],[10.260453784701259]
2,"nu:14;tgt:1000060120;N:2112;proc:Weak[CC],RES;...",0.157555,"[0.0, 0.0, 0.0, 0.0]",1.286015,"[0.0, 0.0, 1.2860148150828978]",11.174863,14,1000060120,"[13, 2112, 2112, 2212]","[0.4810411233983117, 1.4129668406657494, 1.042...","[[-0.3096691413195228, -0.08706404006504413, 0...",[2000000002],[8.36715183999582]
3,"nu:14;tgt:1000060120;N:2112;proc:Weak[CC],RES;...",0.787510,"[0.0, 0.0, 0.0, 0.0]",1.891955,"[0.0, 0.0, 1.8919546855057021]",11.174863,14,1000060120,"[13, 2212, 111]","[0.2781896818998354, 1.0820755909413229, 1.451...","[[-0.1827551461818699, -0.18054668265311294, -...",[2000000002],[10.25508719694302]
4,nu:14;tgt:1000060120;N:2212;q:1(s);proc:Weak[N...,0.150141,"[0.0, 0.0, 0.0, 0.0]",4.799356,"[0.0, 0.0, 4.799355942954444]",11.174863,14,1000060120,"[14, 2112, 211, 111]","[2.7728271136055214, 1.8034664795646638, 0.752...","[[-0.788885309874114, 0.0712998760419519, 2.65...",[2000000002],[10.25302983910591]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,"nu:14;tgt:1000060120;N:2112;proc:Weak[CC],RES;...",1.017069,"[0.0, 0.0, 0.0, 0.0]",3.080875,"[0.0, 0.0, 3.080875215560832]",11.174863,14,1000060120,"[13, 2212, -211, 211, 111]","[0.9938833457498917, 1.5063703777383424, 0.810...","[[-0.6272159878442349, 0.197822427328592, 0.73...",[2000000002],[10.25470096427137]
996,"nu:14;tgt:1000060120;N:2112;proc:Weak[CC],QES;",4.946482,"[0.0, 0.0, 0.0, 0.0]",0.996191,"[0.0, 0.0, 0.9961914162913064]",11.174863,14,1000060120,"[13, 2112, 2212]","[0.9628922545889975, 0.9552543287071839, 0.956...","[[-0.07812932050486668, -0.09229222244009516, ...",[2000000002],[9.296472718544326]
997,"nu:14;tgt:1000060120;N:2212;proc:Weak[CC],RES;...",4.171849,"[0.0, 0.0, 0.0, 0.0]",4.694200,"[0.0, 0.0, 4.694200131941584]",11.174863,14,1000060120,"[13, 2212, 2112, 211]","[3.321388778339659, 1.982438765532098, 1.01434...","[[0.2245277401059632, 1.0175116884415771, 3.15...",[2000000002],[9.312156817499964]
998,"nu:14;tgt:1000060120;N:2112;proc:Weak[CC],QES;",5.052509,"[0.0, 0.0, 0.0, 0.0]",2.431264,"[0.0, 0.0, 2.4312643181002835]",11.174863,14,1000060120,"[13, 2212]","[1.0180434386141433, 2.3274945143607604]","[[0.45581517961643914, -0.7511811864552709, 0....",[2000000002],[10.26058946512538]
