# root2pickle

This module reads root file to save pickle that we want for nflow study. The first part is to load and define libs.

In [1]:
import uproot
import pandas as pd
import numpy as np
from copy import copy

In [2]:
M = 0.938272081 # target mass
me = 0.5109989461 * 0.001 # electron mass
ebeam = 10.604 # beam energy
pbeam = np.sqrt(ebeam * ebeam - me * me) # beam electron momentum
beam = [0, 0, pbeam] # beam vector
target = [0, 0, 0] # target vector

In [3]:
def dot(vec1, vec2):
    # dot product of two 3d vectors
    return vec1[0]*vec2[0]+vec1[1]*vec2[1]+vec1[2]*vec2[2]

def mag(vec1):
    # L2 norm of vector
    return np.sqrt(dot(vec1, vec1))

def mag2(vec1):
    # square of L2 norm
    return  dot(vec1, vec1)

def cosTheta(vec1, vec2):
    # cosine angle between two 3d vectors
    return dot(vec1,vec2)/np.sqrt(mag2(vec1) * mag2(vec2))

def angle(vec1, vec2):
    # angle between two 3d vectors
    return 180/np.pi*np.arccos(np.minimum(1, cosTheta(vec1, vec2)))

def cross(vec1, vec2):
    # cross product of two 3d vectors
    return [vec1[1]*vec2[2]-vec1[2]*vec2[1], vec1[2]*vec2[0]-vec1[0]*vec2[2], vec1[0]*vec2[1]-vec1[1]*vec2[0]]

def vecAdd(gam1, gam2):
    # add two 3d vectors
    return [gam1[0]+gam2[0], gam1[1]+gam2[1], gam1[2]+gam2[2]]

def pi0Energy(gam1, gam2):
    # reconstructed pi0 energy of two 3d photon momenta
    return mag(gam1)+mag(gam2)

def pi0InvMass(gam1, gam2):
    # pi0 invariant mass of two 3d photon momenta
    pi0mass2 = pi0Energy(gam1, gam2)**2-mag2(vecAdd(gam1, gam2))
    pi0mass2 = np.where(pi0mass2 >= 0, pi0mass2, 10**6)
    pi0mass = np.sqrt(pi0mass2)
    pi0mass = np.where(pi0mass > 100, -1000, pi0mass)
    return pi0mass

def getPhi(vec1):
    # azimuthal angle of one 3d vector
    return 180/np.pi*np.arctan2(vec1[1], vec1[0])

def getTheta(vec1):
    # polar angle of one 3d vector
    return 180/np.pi*np.arctan2(np.sqrt(vec1[0]*vec1[0]+vec1[1]*vec1[1]), vec1[2])

def getEnergy(vec1, mass):
    # for taken 3d momenta p and mass m, return energy = sqrt(p**2 + m**2)
    return np.sqrt(mag2(vec1)+mass**2)


Read root file that Bobby prepared.

In [4]:
file = uproot.open("/Users/sangbaek/Dropbox (MIT)/data/project/merged_9628_files.root")
tree = file["T"]

In [5]:
# data frames and their keys to read Z part
df_electronGen = pd.DataFrame()
df_protonGen = pd.DataFrame()
df_gammaGen = pd.DataFrame()
eleKeysGen = ["GenEpx", "GenEpy", "GenEpz"]
proKeysGen = ["GenPpx", "GenPpy", "GenPpz"]
gamKeysGen = ["GenGpx", "GenGpy", "GenGpz"]
# read keys
for key in eleKeysGen:
    df_electronGen[key] = tree[key].array(library="pd", entry_stop=1000)
for key in proKeysGen:
    df_protonGen[key] = tree[key].array(library="pd", entry_stop=1000)
for key in gamKeysGen:
    df_gammaGen[key] = tree[key].array(library="pd", entry_stop=1000)

#convert data type to standard double
df_electronGen = df_electronGen.astype({"GenEpx": float, "GenEpy": float, "GenEpz": float})
df_protonGen = df_protonGen.astype({"GenPpx": float, "GenPpy": float, "GenPpz": float})
df_gammaGen = df_gammaGen.astype({"GenGpx": float, "GenGpy": float, "GenGpz": float})

#set up a dummy index for merging
df_electronGen.loc[:,'event'] = df_electronGen.index
df_protonGen.loc[:,'event'] = df_protonGen.index
df_gammaGen.loc[:,'event'] = df_gammaGen.index.get_level_values('entry')

#sort columns for readability
df_electronGen = df_electronGen.loc[:, ["event", "GenEpx", "GenEpy", "GenEpz"]]

In [6]:
#two g's to one gg.
gamGen = [df_gammaGen["GenGpx"], df_gammaGen["GenGpy"], df_gammaGen["GenGpz"]]
df_gammaGen.loc[:, 'GenGp'] = mag(gamGen)

gam1 = df_gammaGen[df_gammaGen.index.get_level_values('subentry')==0]
gam1 = gam1.reset_index(drop=True)
gam2 = df_gammaGen[df_gammaGen.index.get_level_values('subentry')==1]
gam2 = gam2.reset_index(drop=True)

gam1.loc[:,"GenGp2"] = gam2.loc[:,"GenGp"]
gam1.loc[:,"GenGpx2"] = gam2.loc[:,"GenGpx"]
gam1.loc[:,"GenGpy2"] = gam2.loc[:,"GenGpy"]
gam1.loc[:,"GenGpz2"] = gam2.loc[:,"GenGpz"]
df_gammaGen = gam1

#sort GenG indices so that GenGp > GenGp2. This is because Gp > Gp2 at reconstruction level.
df_gammaGencopy = copy(df_gammaGen)
df_gammaGencopy.loc[:, "GenGp"] = np.where(df_gammaGen["GenGp"]>df_gammaGen["GenGp2"], df_gammaGen.loc[:, "GenGp"], df_gammaGen.loc[:, "GenGp2"])
df_gammaGencopy.loc[:, "GenGpx"] = np.where(df_gammaGen["GenGp"]>df_gammaGen["GenGp2"], df_gammaGen.loc[:, "GenGpx"], df_gammaGen.loc[:, "GenGpx2"])
df_gammaGencopy.loc[:, "GenGpy"] = np.where(df_gammaGen["GenGp"]>df_gammaGen["GenGp2"], df_gammaGen.loc[:, "GenGpy"], df_gammaGen.loc[:, "GenGpy2"])
df_gammaGencopy.loc[:, "GenGpz"] = np.where(df_gammaGen["GenGp"]>df_gammaGen["GenGp2"], df_gammaGen.loc[:, "GenGpz"], df_gammaGen.loc[:, "GenGpz2"])
df_gammaGencopy.loc[:, "GenGp2"] = np.where(df_gammaGen["GenGp"]>df_gammaGen["GenGp2"], df_gammaGen.loc[:, "GenGp2"], df_gammaGen.loc[:, "GenGp"])
df_gammaGencopy.loc[:, "GenGpx2"] = np.where(df_gammaGen["GenGp"]>df_gammaGen["GenGp2"], df_gammaGen.loc[:, "GenGpx2"], df_gammaGen.loc[:, "GenGpx"])
df_gammaGencopy.loc[:, "GenGpy2"] = np.where(df_gammaGen["GenGp"]>df_gammaGen["GenGp2"], df_gammaGen.loc[:, "GenGpy2"], df_gammaGen.loc[:, "GenGpy"])
df_gammaGencopy.loc[:, "GenGpz2"] = np.where(df_gammaGen["GenGp"]>df_gammaGen["GenGp2"], df_gammaGen.loc[:, "GenGpz2"], df_gammaGen.loc[:, "GenGpz"])
df_gammaGen = df_gammaGencopy


#spherical coordinates
eleGen = [df_electronGen["GenEpx"], df_electronGen["GenEpy"], df_electronGen["GenEpz"]]
df_electronGen.loc[:, 'GenEp'] = mag(eleGen)
df_electronGen.loc[:, 'GenEtheta'] = getTheta(eleGen)
df_electronGen.loc[:, 'GenEphi'] = getPhi(eleGen)

proGen = [df_protonGen["GenPpx"], df_protonGen["GenPpy"], df_protonGen["GenPpz"]]
df_protonGen.loc[:, 'GenPp'] = mag(proGen)
df_protonGen.loc[:, 'GenPtheta'] = getTheta(proGen)
df_protonGen.loc[:, 'GenPphi'] = getPhi(proGen)

gamGen = [df_gammaGen["GenGpx"], df_gammaGen["GenGpy"], df_gammaGen["GenGpz"]]
# df_gammaGen.loc[:, 'GenGp'] = mag(gamGen)
df_gammaGen.loc[:, 'GenGtheta'] = getTheta(gamGen)
df_gammaGen.loc[:, 'GenGphi'] = getPhi(gamGen)

gamGen2 = [df_gammaGen["GenGpx2"], df_gammaGen["GenGpy2"], df_gammaGen["GenGpz2"]]
debug = df_gammaGen.loc[:, 'GenGp2'] == mag(gamGen2)
df_gammaGen.loc[:, 'GenGtheta2'] = getTheta(gamGen2)
df_gammaGen.loc[:, 'GenGphi2'] = getPhi(gamGen2)

df_z = pd.merge(df_electronGen, df_protonGen, how='inner', on='event')
df_z = pd.merge(df_z, df_gammaGen, how='inner', on='event')

In [7]:
# data frames and their keys to read X part
df_electronRec = pd.DataFrame()
df_protonRec = pd.DataFrame()
df_gammaRec = pd.DataFrame()
eleKeysRec = ["Epx", "Epy", "Epz", "Esector"]
proKeysRec = ["Ppx", "Ppy", "Ppz", "Psector"]
gamKeysRec = ["Gpx", "Gpy", "Gpz", "Gsector"]
# read them
for key in eleKeysRec:
    df_electronRec[key] = tree[key].array(library="pd", entry_stop=1000)
for key in proKeysRec:
    df_protonRec[key] = tree[key].array(library="pd", entry_stop=1000)
for key in gamKeysRec:
    df_gammaRec[key] = tree[key].array(library="pd", entry_stop=1000)

#convert data type to standard double
df_electronRec = df_electronRec.astype({"Epx": float, "Epy": float, "Epz": float})
df_protonRec = df_protonRec.astype({"Ppx": float, "Ppy": float, "Ppz": float})
df_gammaRec = df_gammaRec.astype({"Gpx": float, "Gpy": float, "Gpz": float})

#set up a dummy index for merging
df_electronRec.loc[:,'event'] = df_electronRec.index
df_protonRec.loc[:,'event'] = df_protonRec.index.get_level_values('entry')
df_gammaRec.loc[:,'event'] = df_gammaRec.index.get_level_values('entry')
df_gammaRec.loc[:,'GIndex'] = df_gammaRec.index.get_level_values('subentry')

In [8]:
#save only FD protons and photons
df_protonRec = df_protonRec[df_protonRec["Psector"]<7]
df_gammaRec = df_gammaRec[df_gammaRec["Gsector"]<7]

df_gg = pd.merge(df_gammaRec, df_gammaRec,
                 how='outer', on='event', suffixes=("", "2"))
df_gg = df_gg[df_gg["GIndex"] < df_gg["GIndex2"]]
df_ep = pd.merge(df_electronRec, df_protonRec, how='outer', on='event')

df_epgg = pd.merge(df_ep, df_gg, how='outer', on='event')
df_epgg = df_epgg[~np.isnan(df_epgg["Ppx"])]
df_epgg = df_epgg[~np.isnan(df_epgg["Gpx"])]
df_epgg = df_epgg[~np.isnan(df_epgg["Gpx2"])]

Now we are done with reading useful kinematics variable. Set up DVpi0P exclusivity cuts.

In [9]:
# useful objects
ele = [df_epgg['Epx'], df_epgg['Epy'], df_epgg['Epz']]
df_epgg.loc[:, 'Ep'] = mag(ele)
df_epgg.loc[:, 'Ee'] = getEnergy(ele, me)
df_epgg.loc[:, 'Etheta'] = getTheta(ele)
df_epgg.loc[:, 'Ephi'] = getPhi(ele)

pro = [df_epgg['Ppx'], df_epgg['Ppy'], df_epgg['Ppz']]
df_epgg.loc[:, 'Pp'] = mag(pro)
df_epgg.loc[:, 'Pe'] = getEnergy(pro, M)
df_epgg.loc[:, 'Ptheta'] = getTheta(pro)
df_epgg.loc[:, 'Pphi'] = getPhi(pro)

gam = [df_epgg['Gpx'], df_epgg['Gpy'], df_epgg['Gpz']]
df_epgg.loc[:, 'Gp'] = mag(gam)
df_epgg.loc[:, 'Ge'] = getEnergy(gam, 0)
df_epgg.loc[:, 'Gtheta'] = getTheta(gam)
df_epgg.loc[:, 'Gphi'] = getPhi(gam)

gam2 = [df_epgg['Gpx2'], df_epgg['Gpy2'], df_epgg['Gpz2']]
df_epgg.loc[:, 'Gp2'] = mag(gam2)
df_epgg.loc[:,'Ge2'] = getEnergy(gam2, 0)
df_epgg.loc[:, 'Gtheta2'] = getTheta(gam2)
df_epgg.loc[:, 'Gphi2'] = getPhi(gam2)

pi0 = vecAdd(gam, gam2)
VGS = [-df_epgg['Epx'], -df_epgg['Epy'], pbeam - df_epgg['Epz']]
v3l = cross(beam, ele)
v3h = cross(pro, VGS)
v3g = cross(VGS, gam)
VmissPi0 = [-df_epgg["Epx"] - df_epgg["Ppx"], -df_epgg["Epy"] -
            df_epgg["Ppy"], pbeam - df_epgg["Epz"] - df_epgg["Ppz"]]
VmissP = [-df_epgg["Epx"] - df_epgg["Gpx"] - df_epgg["Gpx2"], -df_epgg["Epy"] -
            df_epgg["Gpy"] - df_epgg["Gpy2"], pbeam - df_epgg["Epz"] - df_epgg["Gpz"] - df_epgg["Gpz2"]]
Vmiss = [-df_epgg["Epx"] - df_epgg["Ppx"] - df_epgg["Gpx"] - df_epgg["Gpx2"],
            -df_epgg["Epy"] - df_epgg["Ppy"] - df_epgg["Gpy"] - df_epgg["Gpy2"],
            pbeam - df_epgg["Epz"] - df_epgg["Ppz"] - df_epgg["Gpz"] - df_epgg["Gpz2"]]

df_epgg.loc[:, 'Mpx'], df_epgg.loc[:, 'Mpy'], df_epgg.loc[:, 'Mpz'] = Vmiss

# binning kinematics
df_epgg.loc[:,'Q2'] = -((ebeam - df_epgg['Ee'])**2 - mag2(VGS))
df_epgg.loc[:,'nu'] = (ebeam - df_epgg['Ee'])
df_epgg.loc[:,'xB'] = df_epgg['Q2'] / 2.0 / M / df_epgg['nu']
df_epgg.loc[:,'t'] = 2 * M * (df_epgg['Pe'] - M)
df_epgg.loc[:,'W'] = np.sqrt(np.maximum(0, (ebeam + M - df_epgg['Ee'])**2 - mag2(VGS)))
df_epgg.loc[:,'MPt'] = np.sqrt((df_epgg["Epx"] + df_epgg["Ppx"] + df_epgg["Gpx"] + df_epgg["Gpx2"])**2 +
                         (df_epgg["Epy"] + df_epgg["Ppy"] + df_epgg["Gpy"] + df_epgg["Gpy2"])**2)

# exclusivity variables
df_epgg.loc[:,'MM2_ep'] = (-M - ebeam + df_epgg["Ee"] +
                     df_epgg["Pe"])**2 - mag2(VmissPi0)
df_epgg.loc[:,'MM2_egg'] = (-M - ebeam + df_epgg["Ee"] +
                     df_epgg["Ge"] + df_epgg["Ge2"])**2 - mag2(VmissP)
df_epgg.loc[:,'MM2_epgg'] = (-M - ebeam + df_epgg["Ee"] + df_epgg["Pe"] +
                     df_epgg["Ge"] + df_epgg["Ge2"])**2 - mag2(Vmiss)
df_epgg.loc[:,'ME_epgg'] = (M + ebeam - df_epgg["Ee"] - df_epgg["Pe"] - df_epgg["Ge"] - df_epgg["Ge2"])
df_epgg.loc[:,'Mpi0'] = pi0InvMass(gam, gam2)
df_epgg.loc[:,'reconPi'] = angle(VmissPi0, pi0)
df_epgg.loc[:,"Pie"] = df_epgg['Ge'] + df_epgg['Ge2']

# to select duplicates later
df_epgg.loc[:, 'closeness'] = np.abs(df_epgg.loc[:, 'Mpi0']-.1349766)

Make DVPi0P pair, i.e. x.

In [10]:
#make dvpi0 pairs
cut_xBupper = df_epgg["xB"] < 1  # xB
cut_xBlower = df_epgg["xB"] > 0  # xB
cut_Q2 = df_epgg["Q2"] > 1  # Q2
cut_W = df_epgg["W"] > 2  # W

# Exclusivity cuts
cut_mmep = df_epgg["MM2_ep"] < 0.7  # mmep
cut_meepgg = df_epgg["ME_epgg"] < 0.7  # meepgg
cut_mpt = df_epgg["MPt"] < 0.2  # mpt
cut_recon = df_epgg["reconPi"] < 2  # recon gam angle
cut_pi0upper = df_epgg["Mpi0"] < 0.2
cut_pi0lower = df_epgg["Mpi0"] > 0.07
if ("Esector" in df_epgg.index):
    cut_sector = (df_epgg["Esector"]!=df_epgg["Gsector"]) & (df_epgg["Esector"]!=df_epgg["Gsector2"])
else:
    cut_sector = 1

df_dvpi0 = df_epgg[cut_xBupper & cut_xBlower & cut_Q2 & cut_W & cut_mmep & cut_meepgg &
                   cut_mpt & cut_recon & cut_pi0upper & cut_pi0lower & cut_sector]

To treat duplicates. check if there are duplicated epgg pairs in one event.

In [11]:
df_dvpi0[df_dvpi0.event.duplicated()].loc[:,["event", "Gpx","Gpy","Gpz", "Gsector", "Gpx2", "Gpy2", "Gpz2", "Gsector2", "Mpi0"]]

Unnamed: 0,event,Gpx,Gpy,Gpz,Gsector,Gpx2,Gpy2,Gpz2,Gsector2,Mpi0
647,316,0.712709,0.993355,4.560903,2.0,0.11115,0.176166,0.608301,2.0,0.122231
666,328,-0.866459,-1.123065,4.132185,5.0,-0.050055,-0.089777,0.351571,5.0,0.081418
667,328,-0.866459,-1.123065,4.132185,5.0,-0.041932,-0.072628,0.28837,5.0,0.071391
840,416,-0.903463,-0.194219,5.054287,4.0,-0.396398,-0.061595,1.848924,4.0,0.108156
952,467,-1.299427,-0.376316,4.269913,4.0,-0.310968,-0.061387,0.852699,4.0,0.114253
953,467,-1.299427,-0.376316,4.269913,4.0,-0.187794,-0.029593,0.514,4.0,0.097854
1227,617,0.598022,-0.994778,4.354409,6.0,0.260587,-0.310857,1.378853,6.0,0.125453
1423,737,0.563813,-0.873409,6.319451,6.0,0.099322,-0.095376,0.693587,6.0,0.112305
1428,740,-1.064291,-0.021332,4.560226,4.0,-0.453455,0.06649,2.12782,4.0,0.127915
1710,864,-0.513274,0.966071,7.387732,3.0,-0.087755,0.092933,0.745871,3.0,0.113689


Yes, there are some duplicates. Let's investigate one such event.

In [12]:
df_dvpi0[df_dvpi0.event==467]

Unnamed: 0,Epx,Epy,Epz,Esector,event,Ppx,Ppy,Ppz,Psector,Gpx,...,W,MPt,MM2_ep,MM2_egg,MM2_epgg,ME_epgg,Mpi0,reconPi,Pie,closeness
951,1.24083,0.19841,4.581691,1,467,0.405046,0.224029,0.872442,2.0,-1.299427,...,2.87675,0.016221,0.06688,0.801213,-0.001412,-0.081116,0.122413,0.258922,5.510239,0.012563
952,1.24083,0.19841,4.581691,1,467,0.405046,0.224029,0.872442,2.0,-1.299427,...,2.87675,0.038626,0.06688,0.963955,-0.000609,0.040323,0.114253,0.332448,5.3888,0.020723
953,1.24083,0.19841,4.581691,1,467,0.405046,0.224029,0.872442,2.0,-1.299427,...,2.87675,0.159514,0.06688,1.475227,0.002235,0.401997,0.097854,0.481758,5.027125,0.037123


We only want one $x_i$ for one event. Let's use defined 'closeness', which is the distance between reconstructed $\pi^0$ mass and its known $\pi^0$ value. It's defined in the cell 9.

In [13]:
df_dvpi0.sort_values(by='closeness', ascending=False)
df_dvpi0.sort_values(by='event')

Unnamed: 0,Epx,Epy,Epz,Esector,event,Ppx,Ppy,Ppz,Psector,Gpx,...,W,MPt,MM2_ep,MM2_egg,MM2_epgg,ME_epgg,Mpi0,reconPi,Pie,closeness
0,-0.165427,-1.352597,4.958954,5,0,0.588616,0.042466,0.768690,2.0,-0.158805,...,2.688896,0.086917,-0.165752,1.217276,-0.010557,0.279803,0.129723,0.099793,4.770805,0.005253
5,0.768770,0.964280,5.056039,2,3,-0.149674,0.191727,0.403482,4.0,-0.231669,...,2.805284,0.057885,-0.221339,1.016462,-0.008684,0.133810,0.131319,0.506455,5.154286,0.003657
8,0.196804,1.288042,4.134880,2,6,0.265250,0.051943,0.543728,2.0,-0.226750,...,2.897038,0.016771,-0.039255,1.001966,-0.001289,0.104111,0.132474,0.161870,5.985231,0.002502
10,-0.752498,1.047096,5.383471,3,8,0.190259,0.283794,0.646119,3.0,0.226308,...,2.676161,0.059359,0.192602,1.405018,0.009956,0.437911,0.131117,0.930883,4.379267,0.003859
19,0.200385,1.276045,4.978340,2,15,0.387404,-0.079919,0.588445,2.0,-0.457943,...,2.762583,0.041708,0.130417,1.242177,0.005103,0.286116,0.130442,0.347842,4.936926,0.004534
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1910,-1.183368,-0.414255,4.017364,4,990,-0.201857,0.024153,0.519108,5.0,0.937235,...,2.971360,0.153965,0.081696,1.667347,0.006524,0.609883,0.100008,0.180026,5.632523,0.034969
1911,0.392841,-1.030628,3.556638,6,991,-0.040235,-0.487300,0.661064,6.0,-0.286316,...,3.201205,0.028467,-0.035661,1.151693,-0.002473,0.214083,0.134402,0.232084,6.356882,0.000575
1912,-0.547229,1.253646,5.027681,3,992,-0.273500,0.109015,0.710283,4.0,0.750629,...,2.669401,0.013070,-0.007015,0.981964,-0.001207,0.108271,0.101346,0.326703,5.010490,0.033631
1913,-0.440216,-1.266303,4.173828,5,993,-0.443467,0.204916,0.714835,4.0,0.697390,...,2.845797,0.032106,0.021964,0.821511,-0.000884,-0.021255,0.125166,0.285384,5.902963,0.009811


In [14]:
#Let's investigate x again.
df_dvpi0[df_dvpi0.event==467]

Unnamed: 0,Epx,Epy,Epz,Esector,event,Ppx,Ppy,Ppz,Psector,Gpx,...,W,MPt,MM2_ep,MM2_egg,MM2_epgg,ME_epgg,Mpi0,reconPi,Pie,closeness
951,1.24083,0.19841,4.581691,1,467,0.405046,0.224029,0.872442,2.0,-1.299427,...,2.87675,0.016221,0.06688,0.801213,-0.001412,-0.081116,0.122413,0.258922,5.510239,0.012563
952,1.24083,0.19841,4.581691,1,467,0.405046,0.224029,0.872442,2.0,-1.299427,...,2.87675,0.038626,0.06688,0.963955,-0.000609,0.040323,0.114253,0.332448,5.3888,0.020723
953,1.24083,0.19841,4.581691,1,467,0.405046,0.224029,0.872442,2.0,-1.299427,...,2.87675,0.159514,0.06688,1.475227,0.002235,0.401997,0.097854,0.481758,5.027125,0.037123


Now, x is sorted to have the smallest value of closeness on the top of each event, so we can pick the first one by using pandas.DataFrame.duplicated.

In [15]:
df_dvpi0 = df_dvpi0[~df_dvpi0.event.duplicated()]

In [16]:
df_dvpi0

Unnamed: 0,Epx,Epy,Epz,Esector,event,Ppx,Ppy,Ppz,Psector,Gpx,...,W,MPt,MM2_ep,MM2_egg,MM2_epgg,ME_epgg,Mpi0,reconPi,Pie,closeness
0,-0.165427,-1.352597,4.958954,5,0,0.588616,0.042466,0.768690,2.0,-0.158805,...,2.688896,0.086917,-0.165752,1.217276,-0.010557,0.279803,0.129723,0.099793,4.770805,0.005253
5,0.768770,0.964280,5.056039,2,3,-0.149674,0.191727,0.403482,4.0,-0.231669,...,2.805284,0.057885,-0.221339,1.016462,-0.008684,0.133810,0.131319,0.506455,5.154286,0.003657
8,0.196804,1.288042,4.134880,2,6,0.265250,0.051943,0.543728,2.0,-0.226750,...,2.897038,0.016771,-0.039255,1.001966,-0.001289,0.104111,0.132474,0.161870,5.985231,0.002502
10,-0.752498,1.047096,5.383471,3,8,0.190259,0.283794,0.646119,3.0,0.226308,...,2.676161,0.059359,0.192602,1.405018,0.009956,0.437911,0.131117,0.930883,4.379267,0.003859
19,0.200385,1.276045,4.978340,2,15,0.387404,-0.079919,0.588445,2.0,-0.457943,...,2.762583,0.041708,0.130417,1.242177,0.005103,0.286116,0.130442,0.347842,4.936926,0.004534
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1902,0.568592,1.048638,4.155138,2,986,0.378646,-0.019114,0.523247,2.0,-0.646724,...,3.017862,0.084167,-0.103062,1.550403,-0.014551,0.512967,0.108473,0.673788,5.567085,0.026503
1910,-1.183368,-0.414255,4.017364,4,990,-0.201857,0.024153,0.519108,5.0,0.937235,...,2.971360,0.153965,0.081696,1.667347,0.006524,0.609883,0.100008,0.180026,5.632523,0.034969
1911,0.392841,-1.030628,3.556638,6,991,-0.040235,-0.487300,0.661064,6.0,-0.286316,...,3.201205,0.028467,-0.035661,1.151693,-0.002473,0.214083,0.134402,0.232084,6.356882,0.000575
1912,-0.547229,1.253646,5.027681,3,992,-0.273500,0.109015,0.710283,4.0,0.750629,...,2.669401,0.013070,-0.007015,0.981964,-0.001207,0.108271,0.101346,0.326703,5.010490,0.033631


In [17]:
# aesthetically sort columns
df_x = df_dvpi0.loc[:, ["event", "Epx", "Epy", "Epz", "Ep", "Ephi", "Etheta", "Ppx", "Ppy", "Ppz", "Pp", "Pphi", "Ptheta", "Gpx", "Gpy", "Gpz", "Gp", "Gtheta", "Gphi", "Gpx2", "Gpy2", "Gpz2", "Gp2", "Gtheta2", "Gphi2"]]

In [18]:
#df_z looks like this.
df_z

Unnamed: 0,event,GenEpx,GenEpy,GenEpz,GenEp,GenEtheta,GenEphi,GenPpx,GenPpy,GenPpz,...,GenGpz,GenGp,GenGp2,GenGpx2,GenGpy2,GenGpz2,GenGtheta,GenGphi,GenGtheta2,GenGphi2
0,0,-0.165402,-1.354682,4.952507,5.137105,15.406398,-96.961140,0.579889,0.051857,0.802379,...,2.495013,2.601065,2.435876,-0.251126,0.586280,2.350894,16.417586,102.844517,15.179026,113.187281
1,1,-1.028276,0.813085,5.030030,5.198044,14.607200,141.665639,-0.364922,-0.418589,0.796965,...,2.899983,3.050974,1.937901,0.478622,-0.144478,1.872300,18.100955,-15.289290,14.950653,-16.797125
2,2,0.495710,1.274288,4.375149,4.583827,17.354943,68.743494,-0.150289,-0.302374,0.463044,...,3.390313,3.455022,2.398056,-0.154967,-0.333853,2.369641,11.106437,-106.618289,8.828954,-114.899652
3,3,0.765695,0.966826,5.089925,5.237211,13.620438,51.621904,-0.173327,0.192897,0.446552,...,2.659405,2.753612,2.477785,-0.213803,-0.554616,2.405433,15.030539,-122.028481,13.880137,-111.081537
4,4,0.684996,-0.428764,1.548591,1.746767,27.557449,-32.043922,-0.671030,-0.058212,0.557526,...,6.627086,6.640172,1.869672,0.042074,0.074139,1.867727,3.597714,97.729612,2.613256,60.425065
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,995,0.614231,1.148562,4.207613,4.404598,17.200170,61.862897,0.295965,-0.437431,0.614224,...,3.277166,3.348827,2.545400,-0.405753,-0.241762,2.501195,11.874347,-137.063024,10.693639,-149.211995
996,996,0.113064,1.077298,2.485473,2.711259,23.548452,84.008667,0.355116,-0.212392,0.487207,...,4.702239,4.747998,2.943394,-0.193651,-0.267318,2.924827,7.961049,-114.673619,6.439048,-125.920470
997,997,-0.864260,0.770568,4.261022,4.415544,15.202509,138.280022,0.348639,0.185055,0.416250,...,3.297684,3.349385,2.672269,0.294656,-0.412381,2.623765,10.080154,-67.868418,10.933181,-54.453265
998,998,0.337681,-1.032594,2.325038,2.566336,25.044982,-71.891095,0.123244,0.226384,0.417861,...,5.120584,5.157343,2.757260,-0.213153,0.243907,2.738167,6.844925,113.782084,6.746652,131.150492


In [19]:
#df_x looks like this.
df_x

Unnamed: 0,event,Epx,Epy,Epz,Ep,Ephi,Etheta,Ppx,Ppy,Ppz,...,Gpz,Gp,Gtheta,Gphi,Gpx2,Gpy2,Gpz2,Gp2,Gtheta2,Gphi2
0,0,-0.165427,-1.352597,4.958954,5.142773,-96.972849,15.365120,0.588616,0.042466,0.768690,...,2.421095,2.523337,16.366043,102.905997,-0.232252,0.536325,2.170144,2.247467,15.073009,113.414718
5,3,0.768770,0.964280,5.056039,5.204265,51.436503,13.707439,-0.149674,0.191727,0.403482,...,2.640477,2.720144,13.901054,-110.763035,-0.330437,-0.534806,2.351561,2.434142,14.967151,-121.710414
8,6,0.196804,1.288042,4.134880,4.335322,81.312793,17.490774,0.265250,0.051943,0.543728,...,3.408335,3.491610,12.538634,-107.405616,-0.220845,-0.608173,2.408214,2.493620,15.038857,-109.957436
10,8,-0.752498,1.047096,5.383471,5.535740,125.703019,13.469663,0.190259,0.283794,0.646119,...,2.268636,2.381252,17.691372,-71.775773,0.307389,-0.591503,1.883535,1.998015,19.489485,-62.540267
19,15,0.200385,1.276045,4.978340,5.143181,81.075381,14.545266,0.387404,-0.079919,0.588445,...,3.310755,3.437907,15.631459,-119.627388,-0.114020,-0.352309,1.452562,1.499020,14.301733,-107.933394
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1902,986,0.568592,1.048638,4.155138,4.322974,61.532576,16.017887,0.378646,-0.019114,0.523247,...,4.149495,4.277762,14.066247,-128.465243,-0.219826,-0.191519,1.255927,1.289323,13.069271,-138.936653
1910,990,-1.183368,-0.414255,4.017364,4.208465,-160.706670,17.332654,-0.201857,0.024153,0.519108,...,4.328554,4.436661,12.674237,15.674949,0.302821,0.075806,1.154400,1.195862,15.131653,14.054270
1911,991,0.392841,-1.030628,3.556638,3.723734,-69.134820,17.229332,-0.040235,-0.487300,0.661064,...,5.445761,5.601090,13.524980,102.625349,-0.066703,0.211215,0.722606,0.755791,17.041606,107.526415
1912,992,-0.547229,1.253646,5.027681,5.210438,113.581737,15.220013,-0.273500,0.109015,0.710283,...,4.246739,4.486173,18.803673,-58.727923,0.073985,-0.114258,0.506340,0.524318,15.047164,-57.076034


In [20]:
# an example of Cartesian X and Z's
df_zCartesian = df_z.rename(columns = {"GenEpx": "z00", "GenEpy": "z01", "GenEpz": "z02", "GenPpx": "z10", "GenPpy": "z11", "GenPpz": "z12", "GenGpx": "z20", "GenGpy": "z21", "GenGpz": "z22", "GenGpx2": "z30", "GenGpy2": "z31", "GenGpz2": "z32"})
df_zCartesian.loc[:, "z03"] = 1 # electron
df_zCartesian.loc[:, "z13"] = 2 # proton
df_zCartesian.loc[:, "z23"] = 3 # photon
df_zCartesian.loc[:, "z33"] = 3 # photon
df_zCartesian = df_zCartesian.loc[:, ["event", "z00", "z01", "z02", "z03", "z10", "z11", "z12", "z13", "z20", "z21", "z22", "z23", "z30", "z31", "z32", "z33"]]

df_xCartesian = df_x.rename(columns = {"Epx": "x00", "Epy": "x01", "Epz": "x02", "Ppx": "x10", "Ppy": "x11", "Ppz": "x12", "Gpx": "x20", "Gpy": "x21", "Gpz": "x22", "Gpx2": "x30", "Gpy2": "x31", "Gpz2": "x32"})
df_xCartesian.loc[:, "x03"] = 1 # electron
df_xCartesian.loc[:, "x13"] = 2 # proton
df_xCartesian.loc[:, "x23"] = 3 # photon
df_xCartesian.loc[:, "x33"] = 3 # photon
df_xCartesian = df_xCartesian.loc[:, ["event", "x00", "x01", "x02", "x03", "x10", "x11", "x12", "x13", "x20", "x21", "x22", "x23", "x30", "x31", "x32", "x33"]]
dfCartesian = pd.merge(df_xCartesian, df_zCartesian, how = 'inner', on='event')

In [21]:
#Cartesian df looks like this.
dfCartesian

Unnamed: 0,event,x00,x01,x02,x03,x10,x11,x12,x13,x20,...,z12,z13,z20,z21,z22,z23,z30,z31,z32,z33
0,0,-0.165427,-1.352597,4.958954,1,0.588616,0.042466,0.768690,2,-0.158805,...,0.802379,2,-0.163429,0.716758,2.495013,3,-0.251126,0.586280,2.350894,3
1,3,0.768770,0.964280,5.056039,1,-0.149674,0.191727,0.403482,2,-0.231669,...,0.446552,2,-0.378719,-0.605407,2.659405,3,-0.213803,-0.554616,2.405433,3
2,6,0.196804,1.288042,4.134880,1,0.265250,0.051943,0.543728,2,-0.226750,...,0.556964,2,-0.214863,-0.698647,3.294931,3,-0.239611,-0.652432,2.599788,3
3,8,-0.752498,1.047096,5.383471,1,0.190259,0.283794,0.646119,2,0.226308,...,0.661068,2,0.236072,-0.716268,2.353934,3,0.332812,-0.647356,2.058951,3
4,15,0.200385,1.276045,4.978340,1,0.387404,-0.079919,0.588445,2,-0.457943,...,0.603464,2,-0.504022,-0.875556,3.600723,3,-0.109454,-0.337992,1.381799,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
335,986,0.568592,1.048638,4.155138,1,0.378646,-0.019114,0.523247,2,-0.646724,...,0.561075,2,-0.576402,-0.727664,3.689812,3,-0.379106,-0.332251,2.171679,3
336,990,-1.183368,-0.414255,4.017364,1,-0.201857,0.024153,0.519108,2,0.937235,...,0.525507,2,0.822550,0.231547,3.827024,3,0.581798,0.145947,2.218103,3
337,991,0.392841,-1.030628,3.556638,1,-0.040235,-0.487300,0.661064,2,-0.286316,...,0.701620,2,-0.300599,1.311196,5.554668,3,-0.071391,0.228685,0.784033,3
338,992,-0.547229,1.253646,5.027681,1,-0.273500,0.109015,0.710283,2,0.750629,...,0.716978,2,0.690030,-1.131477,3.884314,3,0.141748,-0.223477,0.982961,3


In [22]:
df_zSpherical = df_z.rename(columns = {"GenEp": "z00", "GenEtheta": "z01", "GenEphi": "z02", "GenPp": "z10", "GenPtheta": "z11", "GenPphi": "z12", "GenGp": "z20", "GenGtheta": "z21", "GenGphi": "z22", "GenGp2": "z30", "GenGtheta2": "z31", "GenGphi2": "z32"})
df_zSpherical.loc[:, "z03"] = 1 # electron
df_zSpherical.loc[:, "z13"] = 2 # proton
df_zSpherical.loc[:, "z23"] = 3 # photon
df_zSpherical.loc[:, "z33"] = 3 # photon
df_zSpherical = df_zSpherical.loc[:, ["event", "z00", "z01", "z02", "z03", "z10", "z11", "z12", "z13", "z20", "z21", "z22", "z23", "z30", "z31", "z32", "z33"]]

df_xSpherical = df_x.rename(columns = {"Ep": "x00", "Etheta": "x01", "Ephi": "x02", "Pp": "x10", "Ptheta": "x11", "Pphi": "x12", "Gp": "x20", "Gtheta": "x21", "Gphi": "x22", "Gp2": "x30", "Gtheta2": "x31", "Gphi2": "x32"})
df_xSpherical.loc[:, "x03"] = 1 # electron
df_xSpherical.loc[:, "x13"] = 2 # proton
df_xSpherical.loc[:, "x23"] = 3 # photon
df_xSpherical.loc[:, "x33"] = 3 # photon
df_xSpherical = df_xSpherical.loc[:, ["event", "x00", "x01", "x02", "x03", "x10", "x11", "x12", "x13", "x20", "x21", "x22", "x23", "x30", "x31", "x32", "x33"]]

dfSpherical = pd.merge(df_xSpherical, df_zSpherical, how = 'inner', on='event')

In [23]:
#spherical df looks like this.
dfSpherical

Unnamed: 0,event,x00,x01,x02,x03,x10,x11,x12,x13,x20,...,z12,z13,z20,z21,z22,z23,z30,z31,z32,z33
0,0,5.142773,15.365120,-96.972849,1,0.969100,37.514518,4.126502,2,2.523337,...,5.110101,2,2.601065,16.417586,102.844517,3,2.435876,15.179026,113.187281,3
1,3,5.204265,13.707439,51.436503,1,0.471126,31.082856,127.977677,2,2.720144,...,131.941170,2,2.753612,15.030539,-122.028481,3,2.477785,13.880137,-111.081537,3
2,6,4.335322,17.490774,81.312793,1,0.607203,26.432066,11.079774,2,3.491610,...,13.434528,2,3.375033,12.507819,-107.094864,3,2.691092,14.967690,-110.166152,3
3,8,5.535740,13.469663,125.703019,1,0.730895,27.869926,56.161535,2,2.381252,...,55.388738,2,2.471796,17.764754,-71.758470,3,2.183830,19.469933,-62.791846,3
4,15,5.143181,14.545266,81.075381,1,0.709039,33.909568,-11.656246,2,3.437907,...,-8.879863,2,3.739765,15.672689,-119.927286,3,1.426741,14.418967,-107.943873,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
335,986,4.322974,16.017887,61.532576,1,0.646162,35.925867,-2.889882,2,4.277762,...,1.094183,2,3.804793,14.121614,-128.383685,3,2.229417,13.068207,-138.768531,3
336,990,4.208465,17.332654,-160.706670,1,0.557497,21.386665,173.176840,2,4.436661,...,169.583167,2,3.921265,12.586844,15.721837,3,2.297776,15.132131,14.082314,3
337,991,3.723734,17.229332,-69.134820,1,0.822244,36.488600,-94.720021,2,5.601090,...,-92.356727,2,5.715237,13.613605,102.912242,3,0.819818,16.991089,107.337250,3
338,992,5.210438,15.220013,113.581737,1,0.768888,22.514917,158.268136,2,4.486173,...,158.649141,2,4.104178,18.839085,-58.623102,3,1.017962,15.068304,-57.613713,3


In [24]:
dfSpherical.to_pickle("goodbyeRoot.pkl")

In [25]:
#try to read DF
testDF = pd.read_pickle("goodbyeRoot.pkl")

In [26]:
testDF

Unnamed: 0,event,x00,x01,x02,x03,x10,x11,x12,x13,x20,...,z12,z13,z20,z21,z22,z23,z30,z31,z32,z33
0,0,5.142773,15.365120,-96.972849,1,0.969100,37.514518,4.126502,2,2.523337,...,5.110101,2,2.601065,16.417586,102.844517,3,2.435876,15.179026,113.187281,3
1,3,5.204265,13.707439,51.436503,1,0.471126,31.082856,127.977677,2,2.720144,...,131.941170,2,2.753612,15.030539,-122.028481,3,2.477785,13.880137,-111.081537,3
2,6,4.335322,17.490774,81.312793,1,0.607203,26.432066,11.079774,2,3.491610,...,13.434528,2,3.375033,12.507819,-107.094864,3,2.691092,14.967690,-110.166152,3
3,8,5.535740,13.469663,125.703019,1,0.730895,27.869926,56.161535,2,2.381252,...,55.388738,2,2.471796,17.764754,-71.758470,3,2.183830,19.469933,-62.791846,3
4,15,5.143181,14.545266,81.075381,1,0.709039,33.909568,-11.656246,2,3.437907,...,-8.879863,2,3.739765,15.672689,-119.927286,3,1.426741,14.418967,-107.943873,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
335,986,4.322974,16.017887,61.532576,1,0.646162,35.925867,-2.889882,2,4.277762,...,1.094183,2,3.804793,14.121614,-128.383685,3,2.229417,13.068207,-138.768531,3
336,990,4.208465,17.332654,-160.706670,1,0.557497,21.386665,173.176840,2,4.436661,...,169.583167,2,3.921265,12.586844,15.721837,3,2.297776,15.132131,14.082314,3
337,991,3.723734,17.229332,-69.134820,1,0.822244,36.488600,-94.720021,2,5.601090,...,-92.356727,2,5.715237,13.613605,102.912242,3,0.819818,16.991089,107.337250,3
338,992,5.210438,15.220013,113.581737,1,0.768888,22.514917,158.268136,2,4.486173,...,158.649141,2,4.104178,18.839085,-58.623102,3,1.017962,15.068304,-57.613713,3
