MC simulated samples of $\Lambda_c^{+}$ and $D^+$

In [None]:
import uproot
import numpy as np
import pandas as pd
import awkward as ak
import os

import matplotlib.pyplot as plt

In [None]:
dir_path = "/global/cfs/cdirs/atlas/gottino/Charm/Analysis/run_batch_gup/ntuples"

files = os.listdir(dir_path)
files

['CharmAnalysis.mc.ForcedDecay.Dplus.tree.root',
 'CharmAnalysis.mc.ForcedDecay.LabmdaC.tree.root']

In [None]:
#processes ntunples given directory: returns trees 
def process_ntuple(ntuple):
    file = uproot.open(dir_path + "/" + ntuple)
    tree = file["CharmAnalysis;1"]
    return tree

#takes event #, list of branches, and batch size: returns tuple of branches with data
def show(event, TTree, branches, batch_size):
    #if we want the entire data set set batch size = 0
    if batch_size == 0:
        batch_size = TTree.num_entries

    #iterates through TTree as generator functiona and yields batches of data
    current_event = 0
    for array in TTree.iterate(branches, step_size = batch_size, library = "pd", cut = None):
        arrays = array[branches]
        if current_event == event:
            break
        current_event+=1
    return arrays

In [None]:
TTree = process_ntuple(files[0])
TTree.name

'CharmAnalysis'

In [None]:
df = show(0, TTree, ["DMesons_pt", "DMesons_m", "DMesons_eta", "DMesons_phi"], 20000)

#drop empty rows
empty_indices = [i for i, x in enumerate(df['DMesons_pt']) if len(x) == 0]
df = df.drop(empty_indices)
df = df.reset_index(drop=True)
df

Unnamed: 0,DMesons_pt,DMesons_m,DMesons_eta,DMesons_phi
0,"[5735.74853515625, 5735.74853515625]","[1860.0809326171875, 2207.808349609375]","[0.9682132005691528, 0.9682132005691528]","[1.2285282611846924, 1.2285282611846924]"
1,[10047.125],[1862.4176025390625],[0.08182407170534134],[2.709681510925293]
2,"[18993.7421875, 18993.7421875]","[1853.90625, 2398.886962890625]","[-1.6619627475738525, -1.6619627475738525]","[-0.46051180362701416, -0.46051180362701416]"
3,"[10118.494140625, 10118.494140625]","[1874.203125, 2468.1806640625]","[0.041659336537122726, 0.041659336537122726]","[0.780849277973175, 0.780849277973175]"
4,"[12579.470703125, 12579.470703125]","[1884.987060546875, 2220.384521484375]","[-1.1025604009628296, -1.1025604009628296]","[0.7788794040679932, 0.7788794040679932]"
...,...,...,...,...
10994,[8557.0830078125],[2365.51904296875],[-1.1406751871109009],[-0.47318047285079956]
10995,[31940.462890625],[2077.287109375],[-2.006657361984253],[-2.3032186031341553]
10996,"[35012.45703125, 35012.45703125]","[1858.9073486328125, 2130.3583984375]","[0.14026063680648804, 0.14026063680648804]","[2.5639569759368896, 2.5639569759368896]"
10997,"[12206.798828125, 12206.798828125]","[1848.3248291015625, 2283.203369140625]","[1.3441436290740967, 1.3441436290740967]","[2.958705425262451, 2.958705425262451]"


In [8]:
TTree.keys()

['EventInfo_eventNumber',
 'EventInfo_RandomRunNumber',
 'CharmEventInfo_TopWeight',
 'CharmEventInfo_PV_X',
 'CharmEventInfo_PV_Y',
 'CharmEventInfo_PV_Z',
 'CharmEventInfo_beamPosX',
 'CharmEventInfo_beamPosY',
 'CharmEventInfo_beamPosZ',
 'CharmEventInfo_beamPosSigmaX',
 'CharmEventInfo_beamPosSigmaY',
 'CharmEventInfo_beamPosSigmaZ',
 'DMesons_pt',
 'DMesons_m',
 'DMesons_eta',
 'DMesons_phi',
 'DMesons_pdgId',
 'DMesons_ptcone40',
 'DMesons_D0Index',
 'DMesons_DeltaMass',
 'DMesons_SlowPionD0',
 'DMesons_SlowPionZ0SinTheta',
 'DMesons_decayType',
 'DMesons_truthBarcode',
 'DMesons_fitOutput__Impact',
 'DMesons_fitOutput__ImpactError',
 'DMesons_fitOutput__ImpactZ0',
 'DMesons_fitOutput__ImpactZ0Error',
 'DMesons_fitOutput__ImpactTheta',
 'DMesons_fitOutput__ImpactZ0SinTheta',
 'DMesons_fitOutput__ImpactSignificance',
 'DMesons_fitOutput__Charge',
 'DMesons_fitOutput__Lxy',
 'DMesons_fitOutput__LxyErr',
 'DMesons_fitOutput__Chi2',
 'DMesons_costhetastar',
 'DMesons_mKpi1',
 'DMeson

In [16]:
TTree2 = process_ntuple(files[0])


df2 = show(0, TTree2, ['DMesons_fitOutput__Charge', 'DMesons_fitOutput__Lxy','DMesons_daughterInfo__pdgId'], 100)
empty_indices2 = [i for i, x in enumerate(df2['DMesons_fitOutput__Charge']) if len(x) == 0]
df2 = df2.drop(empty_indices2)
df2 = df2.reset_index(drop=True)
df2

Unnamed: 0,DMesons_fitOutput__Charge,DMesons_fitOutput__Lxy,DMesons_daughterInfo__pdgId
0,"[1, 1]","[3.940532684326172, 3.940532684326172]","((-321, 211, 211), (2212, -321, 211))"
1,[1],[2.3395302295684814],"((-321, 211, 211))"
2,"[1, 1]","[1.9184143543243408, 1.9184143543243408]","((-321, 211, 211), (2212, -321, 211))"
3,"[1, 1]","[2.8901257514953613, 2.8901257514953613]","((-321, 211, 211), (2212, -321, 211))"
4,"[1, 1]","[0.9249186515808105, 0.9249186515808105]","((-321, 211, 211), (2212, -321, 211))"
...,...,...,...
60,"[1, 1]","[0.9720778465270996, 0.9720778465270996]","((-321, 211, 211), (2212, -321, 211))"
61,[1],[0.21821393072605133],"((2212, -321, 211))"
62,"[1, 1]","[14.670319557189941, 14.670319557189941]","((-321, 211, 211), (2212, -321, 211))"
63,[1],[0.3555510938167572],"((2212, -321, 211))"


In [78]:
df2.to_csv('data2.csv', index=False)