In [13]:
#We can go into our root file and see what Trees are availiable
import ROOT
from ROOT import TTree
f = ROOT.TFile.Open("../data/ttbar_13TeV_80.root")
print("Availiable Trees:")
for x in f.GetListOfKeys():
    if isinstance(x.ReadObj(), TTree):
        print(x.GetName())

Availiable Trees:
Delphes


In [2]:
#We can look at is in our Tree
t = f.Get("Delphes")
#t.Print("all") for everything
t.Print("Photon*") 
#t.Print("Particle*")

******************************************************************************
*Tree    :Delphes   : Analysis tree                                          *
*Entries :    10000 : Total =       217060762 bytes  File  Size =   39103144 *
*        :          : Tree compression factor =   5.55                       *
******************************************************************************
*Br    0 :Photon    : Int_t Photon_                                          *
*Entries :    10000 : Total  Size=      89825 bytes  File Size  =      22874 *
*Baskets :        3 : Basket Size=      64000 bytes  Compression=   3.51     *
*............................................................................*
*Br    1 :Photon.fUniqueID : UInt_t fUniqueID[Photon_]                       *
*Entries :    10000 : Total  Size=     105138 bytes  File Size  =      14082 *
*Baskets :        3 : Basket Size=     168448 bytes  Compression=   7.42     *
*.......................................

In [3]:
import sys, os
if __package__ is None:
	import sys, os
	sys.path.append(os.path.realpath("../../"))
    #Assuming that there is a symbolic link to CMS_SURF_2016 in Software this second one should work everywhere
    sys.path.append(os.path.realpath("/data/shared/Software/"))
from CMS_SURF_2016.utils.data_parse import ROOT_to_pandas

#We can extract the data directly
particle_frame = ROOT_to_pandas("../data/ttbar_13TeV_80.root",
                              ["Particle.E","Particle.Px", "Particle.Py", "Particle.Pz",
                               "Particle.PID", "Particle.Charge"],
                              columns=["E", "Px", "Py", "Pz", "PID", "Charge"],
                              verbosity=1)


Extracting data from ../data/ttbar_13TeV_80.root
Using trees: Delphes
Extracting Leaves: Particle.E, Particle.Px, Particle.Py, Particle.Pz, Particle.PID, Particle.Charge
Renaming to: E, Px, Py, Pz, PID, Charge
0           0  282.561829   97.965218  135.163528  146.349121    6       0
1           0  242.979065  -97.965218 -135.163528   44.293591   -6       0
2           0  286.044769   87.359291  137.057495  157.633881    6       0
3           0  246.470398 -105.068771 -133.895004   50.693233   -6       0
4           0  282.489807   92.624489  132.264099  152.219070    6       0
...       ...         ...         ...         ...         ...  ...     ...
348208   9999   71.402092   37.964504  -33.718445  -49.805946   -5       0
348209   9999   10.499156    5.388320   -8.766343   -2.085545  -11       1
348210   9999    5.253664    2.561614   -4.585681   -0.103222   12       0
348211   9999   35.908249   20.290855  -16.882841  -24.344471   14       0
348212   9999   19.674826   10.766300   

In [4]:
#Or save ourselves some typing with leaves_from_obj()
from CMS_SURF_2016.utils.data_parse import leaves_from_obj
columns= ["PT", "Eta", "Phi", "T"]
leaves, columns = leaves_from_obj("Jet", columns)
jet_frame = ROOT_to_pandas("../data/ttbar_13TeV_80.root",
                             leaves,
                              columns=columns,
                              verbosity=1)


Extracting data from ../data/ttbar_13TeV_80.root
Using trees: Delphes
Extracting Leaves: Jet.PT, Jet.Eta, Jet.Phi, Jet.T
Renaming to: PT, Eta, Phi, T
0          0  133.609573  0.492882 -1.788567  5.161640e-09
1          0  105.820587  0.550791  1.623367  5.527065e-09
2          0  103.754044  0.954253  0.243460  6.506372e-09
3          0   47.140400 -0.791700 -2.732852  6.616637e-09
4          0   22.560740  0.786911  3.130388  6.057297e-09
...      ...         ...       ...       ...           ...
54023   9998   42.510010 -1.146696 -1.338811  7.494728e-09
54024   9998   20.269474 -3.463675 -1.878399  9.690226e-09
54025   9999   59.785984  0.684868  0.134917  5.533349e-09
54026   9999   54.094982 -1.112713 -2.679917  7.634685e-09
54027   9999   32.246887 -0.875915 -0.780972  6.465434e-09

[54028 rows x 5 columns]
Elapse time: 2.28 seconds


In [5]:
#And with DataProcessingProcedures we can get really fancy
import numpy as np
from CMS_SURF_2016.utils.data_parse import DataProcessingProcedure

#Define the speed of light C
C = np.float64(2.99792458e8);  
# Define a function that converts [Energy, Eta, Phi] to [E/c, Px, Py, Pz]
def four_vec_func(inputs):
        E = inputs[0]
        Eta = inputs[1]
        Phi = inputs[2]
        E_over_c = E/C
        px = E_over_c * np.sin(Phi) * np.cos(Eta) 
        py = E_over_c * np.sin(Phi) * np.sin(Eta)
        pz = E_over_c * np.cos(Phi)
        return [E_over_c, px, py, pz]
four_vec_inputs, dummy = leaves_from_obj("Photon", ["E", "Eta", "Phi"])

#Define a procedure that uses four_vec_func to make the conversion from
#    four_vec_inputs (i.e [Photon.E, Photon.Eta, Photon.Phi]) to ["E/c", "Px","Py","Pz"]
four_vec_proc = DataProcessingProcedure(four_vec_func, four_vec_inputs, ["E/c", "Px","Py","Pz"])

# Define a function that takes in nothing [] and outputs [22] the PID of a Photon
def PID_func(inputs):
    return [22]

#Define a procedure that uses PID_func to insert the photon PID=22 in to the table
PID_proc = DataProcessingProcedure(PID_func, [], ["PID"])

#If the processing we need to do is simple we can also just use a lambda
#Define a procedure that just outputs 0
charge_proc = DataProcessingProcedure(lambda x:[0], [], ["Charge"])

#Pass in our column names and procedures. The procedures will be replaced by their outputs
columns=[four_vec_proc, PID_proc, charge_proc]
leaves, columns = leaves_from_obj("Photon", columns)

#Extract the table from the root file
photon_frame = ROOT_to_pandas("../data/ttbar_13TeV_80.root",
                      leaves,
                      columns=columns,
                      verbosity=1)


Extracting data from ../data/ttbar_13TeV_80.root
Using trees: Delphes
Procedure at column 1 maps "['Photon.E', 'Photon.Eta', 'Photon.Phi'] -> ['E/c', 'Px', 'Py', 'Pz']"
Procedure at column 5 maps "[] -> ['PID']"
Procedure at column 6 maps "[] -> ['Charge']"
Extracting Leaves: Photon.E, Photon.Eta, Photon.Phi
Renaming to: E/c, Px, Py, Pz, PID, Charge
0          0  5.261209e-08 -4.550922e-08 -2.348900e-08 -1.205030e-08   22   
1          1  1.146690e-07  1.056245e-08  1.749215e-08  1.128336e-07   22   
2          1  6.013556e-08  5.939667e-09  1.001853e-08  5.899691e-08   22   
3          2  4.890948e-08  1.863684e-08 -9.695142e-09 -4.416797e-08   22   
4          2  7.446401e-08  7.954018e-09 -2.499324e-08  6.969189e-08   22   
...      ...           ...           ...           ...           ...  ...   
16059   9995  3.534814e-08  2.719342e-08 -4.979574e-10 -2.257788e-08   22   
16060   9996  4.809934e-08  3.273690e-08 -1.247800e-08 -3.295667e-08   22   
16061   9997  1.826741e-06  8.02

In [6]:
#Now our tables for our Photons and Particles are the same except that one has E and the other has E/c
#So lets do it again for our Particles so that we have a proper 4-momentum

E_over_c_proc = DataProcessingProcedure(lambda x:x[0]/C, ["Particle.E"], ["E/c"])
columns= [E_over_c_proc, "Px", "Py", "Pz", "PID", "Charge"]
leaves, columns = leaves_from_obj("Particle", columns)
particle_frame = ROOT_to_pandas("../data/ttbar_13TeV_80.root",
                             leaves,
                              columns=columns,
                              verbosity=1)


Extracting data from ../data/ttbar_13TeV_80.root
Using trees: Delphes
Procedure at column 1 maps "['Particle.E'] -> ['E/c']"
Extracting Leaves: Particle.E, Particle.Px, Particle.Py, Particle.Pz, Particle.PID, Particle.Charge
Renaming to: E/c, Px, Py, Pz, PID, Charge
0           0  9.425248e-07   97.965218  135.163528  146.349121    6       0
1           0  8.104909e-07  -97.965218 -135.163528   44.293591   -6       0
2           0  9.541426e-07   87.359291  137.057495  157.633881    6       0
3           0  8.221368e-07 -105.068771 -133.895004   50.693233   -6       0
4           0  9.422846e-07   92.624489  132.264099  152.219070    6       0
...       ...           ...         ...         ...         ...  ...     ...
348208   9999  2.381717e-07   37.964504  -33.718445  -49.805946   -5       0
348209   9999  3.502141e-08    5.388320   -8.766343   -2.085545  -11       1
348210   9999  1.752434e-08    2.561614   -4.585681   -0.103222   12       0
348211   9999  1.197770e-07   20.290855 

In [7]:
#Now lets join our particle and photon frames since they are now formated the same
import pandas as pd
frame = pd.concat([particle_frame,photon_frame])
print(frame)

       Entry           E/c            Px            Py            Pz  PID  \
0          0  9.425248e-07  9.796522e+01  1.351635e+02  1.463491e+02    6   
1          0  8.104909e-07 -9.796522e+01 -1.351635e+02  4.429359e+01   -6   
2          0  9.541426e-07  8.735929e+01  1.370575e+02  1.576339e+02    6   
3          0  8.221368e-07 -1.050688e+02 -1.338950e+02  5.069323e+01   -6   
4          0  9.422846e-07  9.262449e+01  1.322641e+02  1.522191e+02    6   
5          0  8.230485e-07 -9.704137e+01 -1.412031e+02  4.813101e+01   -6   
6          0  9.428702e-07  8.689188e+01  1.348697e+02  1.536324e+02    6   
7          0  8.251828e-07 -1.009300e+02 -1.394356e+02  4.856898e+01   -6   
8          0  9.383317e-07  8.794502e+01  1.294648e+02  1.551778e+02    6   
9          0  8.302771e-07 -1.002156e+02 -1.431019e+02  4.716708e+01   -6   
10         0  8.241720e-07 -9.974995e+01 -1.406449e+02  4.590444e+01   -6   
11         0  9.366493e-07  8.780920e+01  1.290652e+02  1.546732e+02    6   

In [8]:
#We can then save this for later in HDF5 format
frame.to_hdf("ttbar_13TeV_80.h5", 'data')

In [9]:
#We can find all the electrons
electron_condition = frame['PID'] == 11
electrons = frame[electron_condition]
print(electrons)

        Entry           E/c          Px         Py          Pz  PID  Charge
58          1  8.753160e-08  -12.902191 -18.211334   13.801711   11      -1
63          1  5.085585e-08  -10.312069   3.989480  -10.497235   11      -1
111         2  7.903927e-08   11.630176  -9.569020   18.293272   11      -1
183         4  6.537174e-08   12.521022  -0.650939   15.062541   11      -1
214         5  4.841218e-07    0.849044 -10.893746  144.724167   11      -1
251         6  8.975284e-08    5.729938   9.062719  -24.678608   11      -1
431        12  4.361674e-08    3.121080  -4.855683   11.732954   11      -1
458        13  3.294717e-06  -35.773647 -66.043327 -984.871338   11      -1
574        16  1.672341e-07    4.800545 -23.138849   44.216717   11      -1
640        18  5.434525e-08    7.740928  -8.863507   11.267438   11      -1
687        19  1.161853e-07   -5.771776  -4.058334  -34.109344   11      -1
785        22  2.003190e-08    4.001147   4.475734    0.153585   11      -1
789        2

In [10]:
#We can find all the positrons
positron_condition = frame['PID'] == -11
positrons = frame[positron_condition]
print(positrons)

        Entry           E/c          Px         Py          Pz  PID  Charge
142         3  2.049406e-08   -3.331319  -4.585961   -2.370566  -11       1
220         5  6.628937e-08   -9.666728  13.536928   10.874018  -11       1
247         6  2.812554e-07   -5.133392 -54.842495  -63.839779  -11       1
281         7  1.324641e-07  -30.327204  12.757425   22.238062  -11       1
722        20  4.480830e-07    8.858373  52.005440  123.539558  -11       1
724        20  4.431201e-07    8.759339  51.429310  122.171379  -11       1
786        22  3.937975e-08    7.789710   8.864367    0.345228  -11       1
790        22  3.942818e-08    7.808531   8.866483    0.362084  -11       1
803        22  1.802288e-07  -52.937412   8.214161   -7.037912  -11       1
971        27  1.173694e-06   19.905781 -60.209663  346.103058  -11       1
1345       39  1.036654e-07  -22.757181  19.360815    8.550900  -11       1
1386       40  2.016139e-08    5.094412  -2.308469    2.291434  -11       1
1675       4

In [11]:
#We can find all the photons
photon_condition = frame['PID'] == 22
photon_condition = frame[photon_condition]
print(photon_condition)

       Entry           E/c            Px            Py            Pz  PID  \
0          0  5.261209e-08 -4.550922e-08 -2.348900e-08 -1.205030e-08   22   
1          1  1.146690e-07  1.056245e-08  1.749215e-08  1.128336e-07   22   
2          1  6.013556e-08  5.939667e-09  1.001853e-08  5.899691e-08   22   
3          2  4.890948e-08  1.863684e-08 -9.695142e-09 -4.416797e-08   22   
4          2  7.446401e-08  7.954018e-09 -2.499324e-08  6.969189e-08   22   
5          2  3.877580e-08  1.532353e-08 -7.871844e-09 -3.473883e-08   22   
6          3  4.874846e-08  4.418413e-08 -2.046041e-08  2.355126e-09   22   
7          4  1.478676e-06  1.178061e-06  8.929124e-07 -3.689857e-08   22   
8          4  2.046877e-07  1.524746e-07 -1.242133e-07 -5.674152e-08   22   
9          5  3.716182e-08 -3.576778e-08  9.537322e-09 -3.271955e-09   22   
10         6  4.860845e-07 -4.431982e-07 -1.459649e-07  1.361902e-07   22   
11         7  4.420411e-08  1.208502e-08  9.183593e-09 -4.151647e-08   22   

In [12]:
#And we can find a bunch of bosons. In this case we get photons and W bosons.
boson_lower = np.abs(frame['PID']) >= 21
boson_upper = np.abs(frame['PID']) <= 37
bosons = frame[(boson_lower & boson_upper)]
print(bosons)

       Entry           E/c            Px            Py            Pz  PID  \
16         0  5.172976e-07  9.617341e+01  2.282994e+01  8.785622e+01   24   
19         0  5.135896e-07  9.486984e+01  2.328997e+01  8.719507e+01   24   
24         0  5.949336e-07 -5.189253e+01 -1.212055e+02  9.120892e+01  -24   
27         0  5.942730e-07 -5.185597e+01 -1.210505e+02  9.104831e+01  -24   
52         1  5.676375e-07 -4.310791e+01 -6.623184e+01 -1.184868e+02  -24   
55         1  5.673563e-07 -4.316243e+01 -6.608554e+01 -1.184275e+02  -24   
98         2  1.677317e-06 -4.111405e+01 -1.099784e+02  4.815266e+02   24   
100        2  4.534526e-07 -9.007568e+01  4.115408e+01 -4.579291e+01  -24   
103        2  4.530237e-07 -8.990714e+01  4.113654e+01 -4.575832e+01  -24   
130        3  3.352869e-07 -3.097373e+01 -3.048625e+01 -4.717030e+01   24   
133        3  3.351501e-07 -3.093670e+01 -3.046111e+01 -4.712342e+01   24   
134        3  3.807873e-07 -7.178213e+00  6.325753e+01 -5.647289e+01  -24   

In [14]:
#And we can find a bunch of quarks.
quark_lower = np.abs(frame['PID']) >= 1
quark_upper = np.abs(frame['PID']) <= 8
quark = frame[(quark_lower & quark_upper)]
print(quark)

        Entry           E/c          Px          Py          Pz  PID  Charge
0           0  9.425248e-07   97.965218  135.163528  146.349121    6       0
1           0  8.104909e-07  -97.965218 -135.163528   44.293591   -6       0
2           0  9.541426e-07   87.359291  137.057495  157.633881    6       0
3           0  8.221368e-07 -105.068771 -133.895004   50.693233   -6       0
4           0  9.422846e-07   92.624489  132.264099  152.219070    6       0
5           0  8.230485e-07  -97.041374 -141.203110   48.131012   -6       0
6           0  9.428702e-07   86.891876  134.869720  153.632416    6       0
7           0  8.251828e-07 -100.929962 -139.435638   48.568985   -6       0
8           0  9.383317e-07   87.945023  129.464828  155.177780    6       0
9           0  8.302771e-07 -100.215584 -143.101913   47.167076   -6       0
10          0  8.241720e-07  -99.749947 -140.644882   45.904442   -6       0
11          0  9.366493e-07   87.809196  129.065247  154.673218    6       0