Try to make our code python-3 friendlier for future implementation:

In [1]:
from __future__ import (absolute_import, division,
                        print_function, unicode_literals)

Import for useful functions we may want

In [2]:
import os, sys, time

We need ROOT's functionality...

In [3]:
import ROOT

Welcome to JupyROOT 6.10/09


Below we'll import the basic PostProcessor

In [4]:
from importlib import import_module
from PhysicsTools.NanoAODTools.postprocessing.framework.postprocessor import PostProcessor

We want the Module Class, and Collection/Object helper methods

In [5]:
from PhysicsTools.NanoAODTools.postprocessing.framework.datamodel import Collection, Object
from PhysicsTools.NanoAODTools.postprocessing.framework.eventloop import Module

Here we define our class module, which we'll load/configure near the bottom, then finally run. This inherits from the base class "Module." Where we do 'def beginJob' or 'def analyze,' we override the base Module's definition with our own. 

In [12]:
class SingleLepton(Module): 
    #We can name our class almost anything, so long as it matches the name 
    #we pass to the modules list inside inside the p=PostProcessor(...) definition below
    def __init__(self):
        self.writeHistFile=True #Necessary to write out a histogram file in the PostProcessor framework
        self.counter = 0 #Define this global variable to count events
        self.DumpEventInfo = True #Toggle for dumping event to standard output stream
        self.DumpEventLimit = 15 #Limit the number of events for which all this info is dumped, compared using the counter defined above
        
    def beginJob(self,histFile=None,histDirName=None):
        #beginJob is typically where histograms should be initialized
        #First call the default Module's beginJob, passing it self, the histFile, and histDirName
        Module.beginJob(self,histFile,histDirName)
        #Here, create a 1-D histogram of type Double(TH1D)
        #with histogram_name h_jets, and someTitle(title)/nJets(x-axis)/Events(y-axis), 20 bins, with domain 0 to 20
        self.h_jets = ROOT.TH1D('h_jets', 'someTitle;nJets;Events',   20, 0, 20)
        #This next line is necessary to 'book' our histogram with the service that will write everything to the output file
        self.addObject(self.h_jets)
        self.h_fatjets = ROOT.TH1D('h_fatjets', ';nFatJets;Events', 8, 0, 8)
        self.addObject(self.h_fatjets)
        self.h_subjets = ROOT.TH1D('h_subjets', ';nSubJets;Events', 16, 0, 16)
        self.addObject(self.h_subjets)
        self.h_eventcount = ROOT.TH1F('h_eventcount', 'nEvents',1,1,2)
        self.addObject(self.h_eventcount)
        
    #Cannot do pass in endJob if we write histograms... they don't get written! So do not override Module below
    #def endJob(self):
        #pass  
    #def beginFile(self, inputFile, outputFile):
        #pass
    #def beginFile(self):
        #pass
    #def endFile(self, inputFile, outputFile):
    #def endFile(self):
        #pass
    def analyze(self, event):
        #Start with a DOC string for our analyze method, denoted by triple-quotes
        """process event, return True (go to next module) or False (fail, go to next event)"""
        
        self.h_jets.Fill(event.nJet)
        self.h_fatjets.Fill(event.nFatJet)
        self.h_subjets.Fill(event.nSubJet)
        
        modulator = 1000 # how often to print an event when we do self.counter % modulator
        self.counter += 1
        self.h_eventcount.Fill(1.0)
        
        #Below we halt execution here for events past the first N (25 when written) by returning False now 
        if self.counter > 25:
            return False
        ###########################################
        ###### Basic Attributes of the Event ######
        ###########################################
        #Use basic python getattr() method to grab this info, no need for Object or Collection here
        run = getattr(event, "run")
        lumi = getattr(event, "luminosityBlock")
        evt = getattr(event, "event")
        #if self.counter % modulator == 0:
    
        ###########################################
        ###### Event Collections and Objects ######
        ###########################################
        #Collections are for variable-length objects, easily identified by a nVARIABLE object in the NanoAOD file ("nJet")
        #Objects are for 1-deep variables, like HLT triggers, where there are many of them, but there is only one boolean value
        #for each HLT_SomeSpecificTrigger for each event. This is just a wrapper, but convenient.
        #This will work for anything that has some common starting name (like "SV_x" and "SV_y" and "SV_z")
        electrons = Collection(event, "Electron")
        photons = Collection(event, "Photon")
        muons = Collection(event, "Muon")
        #taus = Collection(event, "Tau") 
        jets = Collection(event, "Jet")
        fatjets = Collection(event, "FatJet")
        subjets = Collection(event, "SubJet")
        met = Object(event, "MET")
        PV = Object(event, "PV")
        SV = Collection(event, "SV")
        
        ###############################
        ###### Time To Do Stuff! ######
        ###############################
        #Create a TLorentzVector to sum the four-momentum in the event (but don't doublecount by adding Fat/SubJets!)
        eventSum = ROOT.TLorentzVector()
        
        if self.DumpEventInfo and self.counter < self.DumpEventLimit:
            print("\n\nRun: {0:>8d} \tLuminosityBlock: {1:>8d} \tEvent: {2:>8d}".format(run,lumi,evt)) 
            #Below is a formatted output of the Primary Vertex's coordinates, number of Degrees of Freedom, Chi^2 value
            print("PV  X: {0: >5.3f} Y: {1: >5.3f} Z: {2:5.3f} nDoF: {3: >5.3f} Chi^2: {4: >5.3f}".format(
                PV.x,PV.y, PV.z, PV.ndof, PV.chi2)) #getattr(PV, "chi2") also works
        
            #Now we wish to count and print info about the secondary vertices (heavy flavour hadrons!) in the event, including decay length
            print("==============================================================================")
            print("|| Secondary Vertices\tNumber: {0: >3d} \t\t\t\t\t    ||".format(len(SV)))
            if len(SV) > 0:
                print("||\t{0:>5s}\t{1:>5s}\t{2:>5s}\t{3:>5s}\t{4:>5s}\t{5:>5s}\t{6:>5s}\t\t    ||".format(
                    "Pt", "Eta", "Phi", "Chi2", "nDoF", "Mass", "dLen"))
            for vert in SV:
                print("||\t{0: >5.3f}\t{1: >5.3f}\t{2: >5.3f}\t{3: >5.3f}\t{4: >5.3f}\t{5: >5.3f}\t{6: >5.3f}\t\t    ||".format(
                    getattr(vert,"pt"),getattr(vert,"eta"),getattr(vert,"phi"),
                    getattr(vert,"chi2"),getattr(vert,"ndof"),getattr(vert,"mass"),getattr(vert,"dlen")))
                
            #Now the Muons, where JetID is the array index of any jet that matches in eta-phi space (to be used for cross-cleaning)
            print("==============================================================================")
            print("|| Muons\t\tNumber: {0: >3d}\t\t\t\t\t    ||".format(len(muons)))
            if len(muons) > 0:
                print("||\t{0:>5s}\t{1:>5s}\t{2:>5s}\t{3:>5s}\t{4:>5s}\t{5:>5s}\t{6:>5s}\t{7:>5s}  ||".format(
                        "Pt", "Eta", "Phi", "IP3d", "dXY", "dZ", "JetID", "PFRelIso04"))
                for lep in muons:
                    eventSum += lep.p4()
                    print("||\t{0: >5.3f}\t{1: >5.3f}\t{2: >5.3f}\t{3: >5.3f}\t{4: >5.3f}\t{5: >5.3f}\t{6: >5d}\t{7: >5.3f}\t    ||".format(
                            getattr(lep,"pt"),getattr(lep,"eta"),getattr(lep,"phi"),getattr(lep,"ip3d"),
                            getattr(lep,"dxy"),getattr(lep,"dz"),getattr(lep,"jetIdx"),getattr(lep,"pfRelIso04_all")))
            print("==============================================================================")
            print("|| Electrons\tNumber: {0: >3d}\t\t\t\t\t\t    ||".format(len(electrons)))
            if len(electrons) > 0:
                print("||\t{0:>5s}\t{1:>5s}\t{2:>5s}\t{3:>5s}\t{4:>5s}\t{5:>5s}\t{6:>5s}\t{7:>5s}  ||".format(
                        "Pt", "Eta", "Phi", "IP3d", "dXY", "dZ", "JetID", "PFRelIso03"))
                for lep in electrons:
                    eventSum += lep.p4()
                    print("||\t{0: >5.3f}\t{1: >5.3f}\t{2: >5.3f}\t{3: >5.3f}\t{4: >5.3f}\t{5: >5.3f}\t{6: >5d}\t{7: >5.3f}\t    ||".format(
                            getattr(lep,"pt"),getattr(lep,"eta"),getattr(lep,"phi"),getattr(lep,"ip3d"),
                            getattr(lep,"dxy"),getattr(lep,"dz"),getattr(lep,"jetIdx"),getattr(lep,"pfRelIso03_all")))
            #Now the photons. Note whether it passes electronVeto, the electron it may be ID-linked with, etc.
            print("==============================================================================")
            print("|| Photons\tNumber: {0: >3d}\t\t\t\t\t\t    ||".format(len(photons)))
            if len(photons) > 0:
                print("||\t{0:>5s}\t{1:>5s}\t{2:>5s}\t{3:>5s}\t{5:>5s}\t{6:>5s}\t{7:>5s}\t{4:>5s}  ||".format(
                        "Pt", "Eta", "Phi", "mvaID", "PFRelIso03", "e ID", "JetID", "eVeto"))
                for gamma in photons:
                    #eventSum += gamma.p4()
                    print("||\t{0: >5.3f}\t{1: >5.3f}\t{2: >5.3f}\t{3: >5.3f}\t{4: >5d}\t{5: >5d}\t{6: >5d}\t{7: >5.3f}\t    ||".format(
                            getattr(gamma,"pt"),getattr(gamma,"eta"),getattr(gamma,"phi"),getattr(gamma,"mvaID_WP80"),
                            getattr(gamma,"electronIdx"),getattr(gamma,"jetIdx"),getattr(gamma,"electronVeto"),
                            getattr(gamma,"pfRelIso03_all")))
            #Below we'll print info like the CombinedSecondaryVertex Version 2 b-tagging output, the charged electromagnetic and hadron energy fractions...
            print("==============================================================================")
            #for j in filter(self.jetSel,jets):
            print("|| AK4 Jets\tNumber: {0: >3d}\t\t\t\t\t\t    ||".format(len(jets)))
            print("||\t{0:>5s}\t{1:>5s}\t{2:>5s}\t{3:>5s}\t{4:>5s}\t{5:>5s}\t{6:>5s}\t{7:>5s}       ||".format(
                    "Pt", "Eta", "Phi", "CSVv2", "CMVA", "JetID", "ChEmEF", "ChHEF"))
            for jet in jets:
                eventSum += jet.p4()
                print("||\t{0: >5.3f}\t{1: >5.3f}\t{2: >5.3f}\t{3: >5.3f}\t{4: >5.3f}\t{5: >5d}\t{6: >5.3f}\t{7: >5.3f}\t    ||".format(
                        getattr(jet,"pt"),getattr(jet,"eta"),getattr(jet,"phi"),getattr(jet,"btagCSVV2"),
                        getattr(jet,"btagCMVA"),getattr(jet,"jetId"),getattr(jet,"chEmEF"),getattr(jet,"chHEF")))
            print("==============================================================================")
            print("|| AK8 Jets\tNumber: {0: >3d}\t\t\t\t\t\t    ||".format(len(fatjets)))
            if len(fatjets) > 0:
                print("||\t{0:>5s}\t{1:>5s}\t{2:>5s}\t{3:>5s}\t{4:>5s}\t{5:>5s}\t{6:>5s}\t{7:>5s}       ||".format(
                        "Pt", "Eta", "Phi", "CSVv2", "Mass", "MSDrp", "sJID1", "sJID2"))
                for fjet in fatjets:
                    #Don't sum these in the event, as they're just reclustering of the same energy deposits used to construct "jets"
                    print("||\t{0: >5.3f}\t{1: >5.3f}\t{2: >5.3f}\t{3: >5.3f}\t{4: >5.3f}\t{5: >5.3f}\t{6: >5d}\t{7: >5d}\t    ||".format(
                            getattr(fjet,"pt"),getattr(fjet,"eta"),getattr(fjet,"phi"),getattr(fjet,"btagCSVV2"),
                            getattr(fjet,"mass"),getattr(fjet,"msoftdrop"),getattr(fjet,"subJetIdx1"),
                            getattr(fjet,"subJetIdx2")))
            print("==============================================================================")
            print("|| AK8 SubJets\tNumber: {0: >3d}\t\t\t\t\t\t    ||".format(len(subjets)))
            if len(subjets) > 0:
                print("||\t{0:>5s}\t{1:>5s}\t{2:>5s}\t{3:>5s}\t{4:>5s}\t\t\t\t    ||".format(
                        "Pt", "Eta", "Phi", "CSVv2","Mass"))
                for sjet in subjets:
                    #Ditto here, no sum, since these should correspond to "jets" in the AK4 collection
                    print("||\t{0: >5.3f}\t{1: >5.3f}\t{2: >5.3f}\t{3: >5.3f}\t{4: >5.3f}\t\t\t\t    ||".format(
                            getattr(sjet,"pt"),getattr(sjet,"eta"),getattr(sjet,"phi"),getattr(sjet,"btagCSVV2"),
                            getattr(sjet,"mass")))
            print("==============================================================================")
            print("Event Mass: {:<10.4f}\n".format(eventSum.M()))
        
        ###########################################
        ###### Return True to pass the event ######
        ###########################################
        return True

We can do a preselection on any element in the file, for example, that at least one lepton shows up, like so...

In [23]:
#preselection=None #For when we want to loop over every event, and let the analyzer choose to pass/fail the event
preselection="nMuon > 0 || nElectron > 0"

#Need a location for files to be written. 
#Just output where we run from for now using "." or a sibling directory like "../data"
outputDir = "." 

#Json file for discarding events from bad detector conditions, etc.
theJsonPath="../data/json/2016Golden.json"

#Our list doesn't include the server, so we'll pre-pend it with this string in the following code
filePrefix = "root://cms-xrd-global.cern.ch/" #Make certain your proxy is valid
inputList = open("../data/Run2016/Run2016B_DM", "r") #Open the text list of files, found via Data Aggregation Service, as read-only ("r" option)
files=[] #Create empty array
for line in inputList:
    files.append(filePrefix + str(line))
#for file in files: 
#    print(file)
print(files[0]) #These files appear to not be valid at the moment... DNS problems in region, or files removed?

#Experiment with just one file for now... creating a list with just the first element in the full list!
#onefile = [files[0]] 
onefile = ["../data/2055267F-3110-E811-8956-AC1F6B1AEF94.root"]
print(onefile)

root://cms-xrd-global.cern.ch//store/data/Run2016B/DoubleMuon/NANOAOD/05Feb2018_ver2-v1/20000/04AAFB0C-F00F-E811-AF83-FA163E13F6E6.root

[u'../data/2055267F-3110-E811-8956-AC1F6B1AEF94.root']


Here we define the postprocessor with everything getting loaded, from files to JsonFile. 

"Named" options are mostly have a default value defined somewhere(i.e. "justcount=False" is actually default)

"." is the outputdirectory, i.e. "Here" in the case of "." and a sibling directory would be "../data"

jsonInput: json file in dictionary format {"RunNumberInt": [[lumilow,lumihigh],[lumi2low,lumi2high]], "RunNumber2Int":[[low,high]]}

files: list ["fileone.root","filetwo.root"] of inputs, even if one file must be a list!

branchsel: if non-None, selection of branches to not even activate/load into memory, more efficient I/O and speed-wise

outputbranchsel: if non-None, selection of branches to still include in output (see noOut)

noOut: If True, no output of skimmed data is written. If False, will write full data file fitting outputbranch
selections and with postfix concattenated to name

justCount: Just counts events fitting selection criteria? (i.e. number of return True vs return False in analyze method)

postfix: string added to inputfile name to indicate this module processed it (paired with noOut=False!)

histFileName: name of any output file for histograms created in your class, as in beginJob above

histDirName: name INSIDE the "histFileName.root" file's directory structure!

friend: Not tested, but can be used to create "friend" trees to pair with original files.

provenance: not tested

haddFileName: Not properly tested (tied together with fwkJobReport)

fwkJobReport: only relevant with multiple files being added together? Not properly tested

In [24]:
p=PostProcessor(outputDir,onefile,cut=preselection,branchsel=None,modules=[SingleLepton()],friend=False,postfix="_SLpost", 
                jsonInput=theJsonPath,noOut=True,justcount=False,provenance=False,
                haddFileName=None,fwkJobReport=False,histFileName="histOut.root",
                histDirName="plots", outputbranchsel=None)

Now that everything is defined, we'll actually run the process and see it's direct output:

In [25]:
p.run()

Pre-select 11260 entries out of 11647 


Run:   273410 	LuminosityBlock:       50 	Event: 84373408
PV  X: 0.065 Y: 0.094 Z: -2.383 nDoF: 58.875 Chi^2: 0.928
|| Secondary Vertices	Number:   0 					    ||
|| Muons		Number:   1					    ||
||	   Pt	  Eta	  Phi	 IP3d	  dXY	   dZ	JetID	PFRelIso04  ||
||	5.355	-0.228	-0.030	0.029	-0.015	-0.025	    0	1.851	    ||
|| Electrons	Number:   0						    ||
|| Photons	Number:   0						    ||
|| AK4 Jets	Number:   2						    ||
||	   Pt	  Eta	  Phi	CSVv2	 CMVA	JetID	ChEmEF	ChHEF       ||
||	17.781	-0.295	-0.085	0.691	0.711	    3	0.000	0.273	    ||
||	15.016	0.348	-2.472	0.527	-0.856	    3	0.000	0.391	    ||
|| AK8 Jets	Number:   0						    ||
|| AK8 SubJets	Number:   0						    ||
Event Mass: 37.5124   



Run:   273410 	LuminosityBlock:       50 	Event: 84800197
PV  X: 0.066 Y: 0.090 Z: -1.028 nDoF: 88.250 Chi^2: 0.891
|| Secondary Vertices	Number:   0 					    ||
|| Muons		Number:   1					    ||
||	   Pt	  Eta	  Phi	 IP3d	  dXY	   dZ	JetID	PFRelIso04