Skip to content

Commit

Permalink
Merge pull request cms-sw#740 from cbernet/coredevs
Browse files Browse the repository at this point in the history
HeppyCore updates 2
  • Loading branch information
gpetruc committed Sep 5, 2018
2 parents efb55f2 + 6a35974 commit 25d5d3a
Show file tree
Hide file tree
Showing 6 changed files with 146 additions and 43 deletions.
14 changes: 11 additions & 3 deletions PhysicsTools/Heppy/python/analyzers/core/PileUpAnalyzer.py
Expand Up @@ -79,9 +79,18 @@ def setupInputs(self, event=None):

self.mcfile = TFile( self.cfg_comp.puFileMC )
self.mchist = self.mcfile.Get('pileup')
if self.mchist == None: # and not is None!!
# trying the file structure of Artur.
# the distribution for each dataset is stored in the root file with a key like:
# #SUSYGluGluToHToTauTau_M-3200_TuneCP5_13TeV-pythia8#RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1#MINIAODSIM
key = self.cfg_comp.dataset.replace("/","#")
self.mchist = self.mcfile.Get(key)
if self.mchist == None:
raise ValueError('no pile up distribution for dataset {} in file {}'.format(
self.cfg_comp.dataset,
self.mcfile.GetName()
))
self.mchist.Scale( 1 / self.mchist.Integral(0, self.mchist.GetNbinsX() + 1) )

# import pdb; pdb.set_trace()
if self.mchist.GetNbinsX() != self.datahist.GetNbinsX():
raise ValueError('data and mc histograms must have the same number of bins')
if self.mchist.GetXaxis().GetXmin() != self.datahist.GetXaxis().GetXmin():
Expand Down Expand Up @@ -124,7 +133,6 @@ def beginLoop(self, setup):

def process(self, event):
self.readCollections( event.input )

if self.autoPU and self.currentFile != event.input.events.object().getTFile().GetName():
self.setupEventInputs(event)

Expand Down
2 changes: 2 additions & 0 deletions PhysicsTools/Heppy/python/physicsobjects/PhysicsObject.py
Expand Up @@ -27,3 +27,5 @@ def __getattr__(self,name):
directly available.'''
return getattr(self.physObj, name)

def __repr__(self):
return str(self)
127 changes: 100 additions & 27 deletions PhysicsTools/HeppyCore/python/framework/event.py
@@ -1,47 +1,120 @@
import pprint
import copy
import collections
import fnmatch

from ROOT import TChain

class Event(object):
'''Event class.
The Looper passes the Event object to each of its Analyzers,
The Looper passes an Event object to each of its Analyzers,
which in turn can:
- read some information
- add more information
- modify existing information.
Attributes:
iEv = event processing index, starting at 0
eventWeight = a weight, set to 1 at the beginning of the processing
input = input, as determined by the looper
A printout can be obtained by doing e.g.:
event = Event()
print event
The printout can be controlled by the following class attributes:
print_nstrip : number of items in sequence to be printed before stripping the following items
print_patterns : list of patterns. By default, this list is set to ['*'] so that all attributes are
printed
Example:
event = Event()
Event.print_nstrip = 5 # print only the 5 first items of sequences
Event.print_patterns = ['*particles*', 'jet*'] # only print the attributes that
# contain "particles" in their name or
# have a name starting by "jet"
Object attributes:
iEv: event processing index, starting at 0
eventWeight: a weight, set to 1 at the beginning of the processing
input: input, as determined by the looper
analyzers: list of analyzers that processed this event, with their result, in the form:
[(analyzer_name, result?), ...]
#TODO: provide a clear interface for access control (put, get, del products) - we should keep track of the name and id of the analyzer.
'''

def __init__(self, iEv, input_data=None, setup=None, eventWeight=1 ):
print_nstrip = 10
print_patterns = ['*']

def __init__(self, iEv, input_data=None, setup=None, eventWeight=1):
self.iEv = iEv
self.input = input_data
self.setup = setup
self.eventWeight = eventWeight
self.analyzers = []

def _get_print_attrs(self, subname=""):
'''returns a dict of printable information of an event
arguments
* subname is used when called recursively and is the name of the parent object'''
selected_attrs = copy.copy(self.__dict__) #initial selection of what we can print
selected_attrs.pop('setup') #get rid of some bits
selected_attrs.pop('input')

# Colin: defining stripped_attrs
stripped_attrs = dict()

#first of all check for matches with print patterns
for name, value in selected_attrs.iteritems():
if any([fnmatch.fnmatch(name, pattern) for pattern in self.__class__.print_patterns]):
stripped_attrs[name] = value
for name, value in stripped_attrs.iteritems():
if hasattr(value, '__len__') and \
hasattr(value.__len__, '__call__') and \
len(value)>self.__class__.print_nstrip+1:
# taking the first 10 elements and converting to a python list
# note that value could be a wrapped C++ vector
if isinstance(value, collections.Mapping):
entries = [entry for entry in value.iteritems()]
entries = entries[:self.__class__.print_nstrip]
entries
stripped_attrs[name] = dict(entries)
else:
stripped_attrs[name] = [ val for val in value[:self.__class__.print_nstrip] ]
stripped_attrs[name].append('...')
stripped_attrs[name].append(value[-1])
return stripped_attrs

def _print_elements(self, name, value):
'''returns a dict ready for printing (limited to print_nstrip elements)
Note this function allows for lists or for dicts of dicts
contents of lists are not handled recursively
arguments
* name = name of attribute
* value = its value
'''
newdata=dict()
if hasattr(value, '__len__') and isinstance(value, collections.Mapping): #dict:
subdict = dict()
for newname, entry in value.iteritems(): #allow recursion in case this dict contains a dict
subdict.update(self._print_elements(newname, entry))
if len(value) > self.__class__.print_nstrip+1: #use only part of the dict
entries = [entry for entry in subdict.iteritems()]
entries = entries[:self.__class__.print_nstrip]
entries.append(("...", "...")) # no guarantees where abouts this is printed
newdata[name] = dict(entries)
else: #not too big so using whole dict is OK
newdata[name] = subdict
elif hasattr(value, '__len__') and len(value)>self.__class__.print_nstrip+1: #list
newdata[name] = [val for val in value[:self.__class__.print_nstrip]]
newdata[name].append('...')
newdata[name].append(value[-1])
else:
newdata[name] = value
return newdata

def __str__(self):
header = '{type}: {iEv}'.format( type=self.__class__.__name__,
iEv = self.iEv)
varlines = []
for var,value in sorted(vars(self).iteritems()):
tmp = value
# check for recursivity
recursive = False
if hasattr(value, '__getitem__') and \
not isinstance(value, collections.Mapping) and \
(len(value)>0 and value[0].__class__ == value.__class__):
recursive = True
if hasattr(value, '__contains__') and \
not isinstance(value, (str,unicode)) and \
not isinstance(value, TChain) and \
not recursive :
tmp = map(str, value)

varlines.append( '\t{var:<15}: {value}'.format(var=var, value=tmp) )
all = [ header ]
all.extend(varlines)
return '\n'.join( all )
#prints an event showing at most print_nstrip elements of lists and dicts
# if an event contains an event (such as a papasevent)
# it will print the papasevent in the same way
header = '{type}: {iEv}'.format(type=self.__class__.__name__, iEv=self.iEv)
print_attrs = self._get_print_attrs()
contents = pprint.pformat(print_attrs, indent=4)
return '\n'.join([header, contents])
29 changes: 20 additions & 9 deletions PhysicsTools/HeppyCore/python/framework/eventsfwlite.py
@@ -1,5 +1,8 @@
from DataFormats.FWLite import Events as FWLiteEvents

import logging
import pprint

from ROOT import gROOT, gSystem, AutoLibraryLoader

print "Loading FW Lite"
Expand All @@ -15,16 +18,24 @@

class Events(object):
def __init__(self, files, tree_name, options=None):
if options is not None :
if not hasattr(options,"inputFiles"):
options.inputFiles=files
if not hasattr(options,"maxEvents"):
options.maxEvents = 0
if not hasattr(options,"secondaryInputFiles"):
options.secondaryInputFiles = []
self.events = FWLiteEvents(options=options)
logging.info(
'opening input files:\n{}'.format(pprint.pformat(files))
)
if options is not None :
if not hasattr(options,"inputFiles"):
options.inputFiles=files
if not hasattr(options,"maxEvents"):
options.maxEvents = 0
if not hasattr(options,"secondaryInputFiles"):
options.secondaryInputFiles = []
elif options.secondaryInputFiles: # only if it's a non-empty list
logging.info('using secondary input files:\n{}'.format(
pprint.pformat(options.secondaryInputFiles)
))
self.events = FWLiteEvents(options=options)
else :
self.events = FWLiteEvents(files)
self.events = FWLiteEvents(files)
logging.info('done')

def __len__(self):
return self.events.size()
Expand Down
1 change: 0 additions & 1 deletion PhysicsTools/HeppyCore/python/framework/heppy_loop.py
Expand Up @@ -45,7 +45,6 @@ def runLoopAsync(comp, outDir, configName, options):
_globalGracefulStopFlag = multiprocessing.Value('i',0)
def runLoop( comp, outDir, config, options):
fullName = '/'.join( [outDir, comp.name ] )
# import pdb; pdb.set_trace()
config.components = [comp]
memcheck = 2 if getattr(options,'memCheck',False) else -1
loop = Looper( fullName,
Expand Down
16 changes: 13 additions & 3 deletions PhysicsTools/HeppyCore/python/framework/looper.py
Expand Up @@ -137,10 +137,13 @@ def doSigUsr2(sig,frame):
# so that analyzers cannot modify the config of other analyzers.
# but cannot copy the autofill config.
self.setup = Setup(config, services)
self.logger.info('looper initialized')

def _build(self, cfg):
self.logger.info('building {} ...'.format(cfg.name))
theClass = cfg.class_object
obj = theClass( cfg, self.cfg_comp, self.outDir )
self.logger.info('done')
return obj

def _prepareOutput(self, name):
Expand Down Expand Up @@ -170,6 +173,7 @@ def loop(self):
nEvents = self.nEvents
firstEvent = self.firstEvent
iEv = firstEvent
self.logger.info('deciding on the number of events (can take a long time for a lot of input files...)')
if nEvents is None or int(nEvents) > len(self.events) :
nEvents = len(self.events)
else:
Expand All @@ -181,11 +185,15 @@ def loop(self):
eventSize=eventSize))
self.logger.info( str( self.cfg_comp ) )
for analyzer in self.analyzers:
self.logger.info('starting ' + analyzer.name)
analyzer.beginLoop(self.setup)
self.logger.info('beginLoop done')
try:
at_firstEvent = True
for iEv in range(firstEvent, firstEvent+eventSize):
# if iEv == nEvents:
# break
if at_firstEvent:
self.logger.info('processing first event')
self.process( iEv )
if iEv%100 ==0:
# print 'event', iEv
if not hasattr(self,'start_time'):
Expand All @@ -195,7 +203,9 @@ def loop(self):
else:
print 'event %d (%.1f ev/s)' % (iEv, (iEv-self.start_time_event)/float(timeit.default_timer() - self.start_time))

self.process( iEv )
if at_firstEvent:
self.logger.info('done first event')
at_firstEvent = False
if iEv<self.nPrint:
print self.event
if self.stopFlag and self.stopFlag.value:
Expand Down

0 comments on commit 25d5d3a

Please sign in to comment.