# Class for Generation of Dataset

Making the process of generation of dataset a much simpler task

In [12]:
# Imports pyRoot, which requires a manual compile as defaut ROOT binary comes with a python2 support
import ROOT
from ROOT import TLorentzVector
from math import sqrt
import h5py
import numpy as np
import awkward as ak
import pandas as pd
import json

ROOT.gSystem.Load("libDelphes.so")

-1

cling::DynamicLibraryManager::loadLibrary(): libCore.so.6.22: cannot open shared object file: No such file or directory


In [40]:
class GenDataset():
    def __init__(self,delphesRootFile,isRapid : bool = True):
        self.delphesFile = delphesRootFile
        self.isRapid = isRapid

    def createArrays(self):
        File = ROOT.TChain("Delphes;1")
        File.Add(self.delphesFile)
        self.totalEvents = File.GetEntries()

        eventDict = []
        for i in range(self.totalEvents):
        	Entry = File.GetEntry(i)

        	epArray = []
        	ezArray = []
        	az_angle = [] # Azimuthal Angle
        	ra_angle = [] # Rapidity

        	EntryFromBranch = File.Photon.GetEntries()	
        	for j in range(EntryFromBranch):
        		particleArray = [1,0,0,0,File.GetLeaf("Photon.PT").GetValue(j),File.GetLeaf("Photon.E").GetValue(j),0]
        		epArray.append(particleArray)

        		az_angle.append(File.GetLeaf("Photon.Phi").GetValue(j))
        		ra_angle.append(File.GetLeaf("Photon.Eta").GetValue(j))


        	EntryFromBranch = File.Jet.GetEntries()
        	for j in range(EntryFromBranch):
            
        		Jet = TLorentzVector()
        		Jet.SetPtEtaPhiM(File.GetLeaf("Jet.PT").GetValue(j),File.GetLeaf("Jet.Eta").GetValue(j),File.GetLeaf("Jet.Phi").GetValue(j),File.GetLeaf("Jet.Mass").GetValue(j))

        		particleArray = [0,0,1 if int(File.GetLeaf("Jet.BTag").GetValue(j)) == 1 else -1,0,File.GetLeaf("Jet.PT").GetValue(j),Jet.Energy(),File.GetLeaf("Jet.Mass").GetValue(j)]
        		epArray.append(particleArray)

        		az_angle.append(File.GetLeaf("Jet.Phi").GetValue(j))
        		ra_angle.append(File.GetLeaf("Jet.Eta").GetValue(j))

        	EntryFromBranch = File.Electron.GetEntries()
        	for j in range(EntryFromBranch):
        		Electron = TLorentzVector()
        		Electron.SetPtEtaPhiM(File.GetLeaf("Electron.PT").GetValue(j),File.GetLeaf("Electron.Eta").GetValue(j),File.GetLeaf     ("Electron.Phi").GetValue(j),0)

        		particleArray = [0,int(File.GetLeaf("Electron.Charge").GetValue(j)),0,0,File.GetLeaf("Electron.PT").GetValue(j),        Electron.Energy(),0]
        		epArray.append(particleArray)

        		az_angle.append(File.GetLeaf("Electron.Phi").GetValue(j))
        		ra_angle.append(File.GetLeaf("Electron.Eta").GetValue(j))


        	EntryFromBranch = File.MissingET.GetEntries()
        	for j in range(EntryFromBranch):
        		particleArray = [0,0,0,1,File.GetLeaf("MissingET.MET").GetValue(j),File.GetLeaf("MissingET.MET").GetValue(j),0]	
        		epArray.append(particleArray)

        		az_angle.append(File.GetLeaf("MissingET.Phi").GetValue(j))
        		ra_angle.append(File.GetLeaf("MissingET.Eta").GetValue(j))


        	EntryFromBranch = File.Muon.GetEntries()
        	for j in range(EntryFromBranch):
            
        		Muon = TLorentzVector()
        		Muon.SetPtEtaPhiM(File.GetLeaf("Muon.PT").GetValue(j),File.GetLeaf("Muon.Eta").GetValue(j),File.GetLeaf("Muon.Phi").GetValue(j),0)

        		particleArray = [0,int(File.GetLeaf("Muon.Charge").GetValue(j)),0,0,File.GetLeaf("Muon.PT").GetValue(j),Muon.Energy(),0]
        		epArray.append(particleArray)

        		az_angle.append(File.GetLeaf("Muon.Phi").GetValue(j))
        		ra_angle.append(File.GetLeaf("Muon.Eta").GetValue(j))


        	# Getting the Angular Angle Distance
        	noPart = len(az_angle) # Number of particles in the event

        	if self.isRapid:
        		for i in range(noPart):
        			tempAng = []
        			for j in range(noPart):
        				tempAng.append(sqrt((az_angle[i] - az_angle[j])**2 + (ra_angle[i] - ra_angle[j])**2))
        			ezArray.append(tempAng)

        	else:
        		for i in range(noPart):
        			tempAng = []
        			for j in range(noPart):
        				tempAng.append(az_angle[i] - az_angle[j])
        			ezArray.append(tempAng)

        	eventDict.append({"fourMomenta" : epArray,"azimuthalAngle" : ezArray})

        self.datasetDict = eventDict

    def createDataset(self,outputFile):
        xArray = []
        for i in range(self.totalEvents):
            xArray.append(self.datasetDict[i]["fourMomenta"])

        azArray = []
        for i in range(self.totalEvents):
            azArray.append(self.datasetDict[i]["azimuthalAngle"])

        hf = h5py.File(outputFile,'w')
        partArray = hf.create_group("ParticleArray")
        azimuthalArray = hf.create_group("AzimuthalAngle")

        # Convert Particle Array to HDF5 Group

        ak_array = ak.from_iter(xArray)
        form, length, container = ak.to_buffers(ak_array,container=partArray)
        partArray.attrs["form"] = form.tojson()
        partArray.attrs["length"] = json.dumps(length)

        # Convert Azimuthal Angle Array to HDF5 Group

        ak_array = ak.from_iter(azArray)
        form, length, container = ak.to_buffers(ak_array,container=azimuthalArray)
        azimuthalArray.attrs["form"] = form.tojson()
        azimuthalArray.attrs["length"] = json.dumps(length)

        hf.close()

    def getArraysFromFile(self,inputFile):
        hf = h5py.File(inputFile,'r')
        partArray = hf.get("ParticleArray")
        azimuthalArray = hf.get("AzimuthalAngle")

        reconstitutedPartArray = ak.from_buffers(
            ak.forms.Form.fromjson(partArray.attrs["form"]),
            json.loads(partArray.attrs["length"]),
            {k: np.asarray(v) for k, v in partArray.items()},
        )
        particleArray = ak.to_list(reconstitutedPartArray)

        reconstitutedAzAngle = ak.from_buffers(
            ak.forms.Form.fromjson(azimuthalArray.attrs["form"]),
            json.loads(azimuthalArray.attrs["length"]),
            {k: np.asarray(v) for k, v in azimuthalArray.items()},
        )
        azArray = ak.to_list(reconstitutedAzAngle)

        return particleArray,azArray

    def getPlots(self,arrayType : str,recordNo : int = 0):
        if arrayType == 'fourMomenta':    
            print(" Photon | Lepton | Jet  | MET |  pt   |   E   |  mass ")
            print("------------------------------------------------------")
            for i in self.datasetDict[recordNo]["fourMomenta"]:
                print("   "+ str(i[0]) + "    |" + "   " +str('%2d' % i[1]) + "   |" + "  "+str('%2d'%i[2])+"  |" + "  "+ str(i[3]) + "  |" + str('%7.3f' % i[4]) + "|" + str('%7.3f' % i[5]) + "|" + str('%7.3f' % i[6]))

        else :
            partNo = len(self.datasetDict[recordNo]["azimuthalAngle"])
            print(" "*10 + "|",end="")
            for i in range(partNo):
                print('%5d'%i + "     |",end="")
            print()
            print("-----------"*(partNo+1))
            for i in range(partNo):
                print('%5d'%i + "     |",end="")
                for j in range(partNo):
                    print('%10.5f'%self.datasetDict[recordNo]["azimuthalAngle"][i][j] + "|",end="")
                print()

In [41]:
s1 = GenDataset("/home/blizzard/Tests/hh_bbWW/Events/run_01/tag_1_delphes_events.root")
s1.createArrays()

In [42]:
s1.getPlots("azimuthalAngle",400)

          |    0     |    1     |    2     |    3     |    4     |    5     |    6     |
----------------------------------------------------------------------------------------
    0     |   0.00000|   5.36049|   4.54552|   1.75779|   3.80420|   3.60204|   3.07753|
    1     |   5.36049|   0.00000|   1.21813|   4.07778|   3.46719|   3.69358|   3.11844|
    2     |   4.54552|   1.21813|   0.00000|   3.57888|   2.25025|   2.47568|   1.95150|
    3     |   1.75779|   4.07778|   3.57888|   0.00000|   3.79644|   3.71306|   2.87402|
    4     |   3.80420|   3.46719|   2.25025|   3.79644|   0.00000|   0.29172|   0.93283|
    5     |   3.60204|   3.69358|   2.47568|   3.71306|   0.29172|   0.00000|   0.92129|
    6     |   3.07753|   3.11844|   1.95150|   2.87402|   0.93283|   0.92129|   0.00000|
