Merge pull request cms-sw#740 from cbernet/coredevs

HeppyCore updates 2
anmehta · Sep 5, 2018 · 25d5d3a · 25d5d3a
2 parents efb55f2 + 6a35974
commit 25d5d3a
Show file tree

Hide file tree

Showing 6 changed files with 146 additions and 43 deletions.
diff --git a/PhysicsTools/Heppy/python/analyzers/core/PileUpAnalyzer.py b/PhysicsTools/Heppy/python/analyzers/core/PileUpAnalyzer.py
@@ -79,9 +79,18 @@ def setupInputs(self, event=None):
 
                     self.mcfile = TFile( self.cfg_comp.puFileMC )
                     self.mchist = self.mcfile.Get('pileup')
+                    if self.mchist == None: # and not is None!!
+                        # trying the file structure of Artur. 
+                        # the distribution for each dataset is stored in the root file with a key like: 
+                        # #SUSYGluGluToHToTauTau_M-3200_TuneCP5_13TeV-pythia8#RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14-v1#MINIAODSIM
+                        key = self.cfg_comp.dataset.replace("/","#")
+                        self.mchist = self.mcfile.Get(key)
+                        if self.mchist == None: 
+                            raise ValueError('no pile up distribution for dataset {} in file {}'.format(
+                                    self.cfg_comp.dataset,
+                                    self.mcfile.GetName()
+                                    ))
                     self.mchist.Scale( 1 / self.mchist.Integral(0, self.mchist.GetNbinsX() + 1) )
-
-                    # import pdb; pdb.set_trace()
                     if self.mchist.GetNbinsX() != self.datahist.GetNbinsX():
                         raise ValueError('data and mc histograms must have the same number of bins')
                     if self.mchist.GetXaxis().GetXmin() != self.datahist.GetXaxis().GetXmin():
@@ -124,7 +133,6 @@ def beginLoop(self, setup):
 
     def process(self, event):
         self.readCollections( event.input )
-
         if self.autoPU and self.currentFile != event.input.events.object().getTFile().GetName():
             self.setupEventInputs(event)
 

diff --git a/PhysicsTools/Heppy/python/physicsobjects/PhysicsObject.py b/PhysicsTools/Heppy/python/physicsobjects/PhysicsObject.py
@@ -27,3 +27,5 @@ def __getattr__(self,name):
         directly available.'''
         return getattr(self.physObj, name)
 
+    def __repr__(self):
+        return str(self)
diff --git a/PhysicsTools/HeppyCore/python/framework/event.py b/PhysicsTools/HeppyCore/python/framework/event.py
@@ -1,47 +1,120 @@
+import pprint
+import copy
 import collections
+import fnmatch
+
 from ROOT import TChain
 
 class Event(object):
     '''Event class.
 
-    The Looper passes the Event object to each of its Analyzers,
+    The Looper passes an Event object to each of its Analyzers,
     which in turn can:
     - read some information
     - add more information
     - modify existing information.
 
-    Attributes:
-      iEv = event processing index, starting at 0
-      eventWeight = a weight, set to 1 at the beginning of the processing
-      input = input, as determined by the looper
+    A printout can be obtained by doing e.g.:
+    
+      event = Event() 
+      print event 
+
+    The printout can be controlled by the following class attributes:
+      print_nstrip : number of items in sequence to be printed before stripping the following items
+      print_patterns : list of patterns. By default, this list is set to ['*'] so that all attributes are
+                    printed
+
+    Example:
+      event = Event()
+      Event.print_nstrip = 5  # print only the 5 first items of sequences
+      Event.print_patterns = ['*particles*', 'jet*'] # only print the attributes that 
+                                                     # contain "particles" in their name or
+                                                     # have a name starting by "jet" 
+       
+    Object attributes:
+      iEv: event processing index, starting at 0
+      eventWeight: a weight, set to 1 at the beginning of the processing
+      input: input, as determined by the looper
+      analyzers: list of analyzers that processed this event, with their result, in the form:
+          [(analyzer_name, result?), ...]
     #TODO: provide a clear interface for access control (put, get, del products) - we should keep track of the name and id of the analyzer.
     '''
 
-    def __init__(self, iEv, input_data=None, setup=None, eventWeight=1 ):
+    print_nstrip = 10
+    print_patterns = ['*']
+
+    def __init__(self, iEv, input_data=None, setup=None, eventWeight=1):
         self.iEv = iEv
         self.input = input_data
         self.setup = setup
         self.eventWeight = eventWeight
+        self.analyzers = []
+
+    def _get_print_attrs(self, subname=""):
+        '''returns a dict of printable information of an event
+        arguments
+        * subname is used when called recursively and is the name of the parent object'''
+        selected_attrs = copy.copy(self.__dict__) #initial selection of what we can print
+        selected_attrs.pop('setup') #get rid of some bits
+        selected_attrs.pop('input')
+
+        # Colin: defining stripped_attrs
+        stripped_attrs = dict()
+
+        #first of all check for matches with print patterns
+        for name, value in selected_attrs.iteritems():
+            if any([fnmatch.fnmatch(name, pattern) for pattern in self.__class__.print_patterns]):
+                stripped_attrs[name] = value
+        for name, value in stripped_attrs.iteritems():
+            if hasattr(value, '__len__') and \
+               hasattr(value.__len__, '__call__') and \
+               len(value)>self.__class__.print_nstrip+1:
+                # taking the first 10 elements and converting to a python list 
+                # note that value could be a wrapped C++ vector
+                if isinstance(value, collections.Mapping):
+                    entries = [entry for entry in value.iteritems()]
+                    entries = entries[:self.__class__.print_nstrip]
+                    entries
+                    stripped_attrs[name] = dict(entries)
+                else:
+                    stripped_attrs[name] = [ val for val in value[:self.__class__.print_nstrip] ]
+                    stripped_attrs[name].append('...')
+                    stripped_attrs[name].append(value[-1])
+        return stripped_attrs
+
+    def _print_elements(self, name, value):
+        '''returns a dict ready for printing (limited to print_nstrip elements)
+        Note this function allows for lists or for dicts of dicts
+        contents of lists are not handled recursively
+        arguments 
+        * name = name of attribute
+        * value = its value
+        '''
+        newdata=dict()
+        if hasattr(value, '__len__') and isinstance(value, collections.Mapping): #dict:      
+            subdict = dict()
+            for newname, entry in value.iteritems(): #allow recursion in case this dict contains a dict
+                subdict.update(self._print_elements(newname, entry)) 
+            if len(value) > self.__class__.print_nstrip+1: #use only part of the dict
+                entries = [entry for  entry in subdict.iteritems()]
+                entries = entries[:self.__class__.print_nstrip]
+                entries.append(("...", "...")) # no guarantees where abouts this is printed
+                newdata[name] = dict(entries) 
+            else: #not too big so using whole dict is OK
+                newdata[name] = subdict 
+        elif hasattr(value, '__len__') and len(value)>self.__class__.print_nstrip+1: #list 
+            newdata[name] = [val for val in value[:self.__class__.print_nstrip]]
+            newdata[name].append('...')
+            newdata[name].append(value[-1])   
+        else:
+            newdata[name] = value
+        return newdata    
 
     def __str__(self):
-        header = '{type}: {iEv}'.format( type=self.__class__.__name__,
-                                         iEv = self.iEv)
-        varlines = []
-        for var,value in sorted(vars(self).iteritems()):
-            tmp = value
-            # check for recursivity
-            recursive = False
-            if hasattr(value, '__getitem__') and \
-               not isinstance(value, collections.Mapping) and \
-               (len(value)>0 and value[0].__class__ == value.__class__):
-                    recursive = True
-            if hasattr(value, '__contains__') and \
-                   not isinstance(value, (str,unicode)) and \
-                   not isinstance(value, TChain) and \
-                   not recursive :
-                tmp = map(str, value)
-
-            varlines.append( '\t{var:<15}:   {value}'.format(var=var, value=tmp) )
-        all = [ header ]
-        all.extend(varlines)
-        return '\n'.join( all )
+        #prints an event showing at most print_nstrip elements of lists and dicts
+        # if an event contains an event (such as a papasevent)
+        # it will print the papasevent in the same way
+        header = '{type}: {iEv}'.format(type=self.__class__.__name__, iEv=self.iEv)
+        print_attrs = self._get_print_attrs()
+        contents = pprint.pformat(print_attrs, indent=4)
+        return '\n'.join([header, contents])
diff --git a/PhysicsTools/HeppyCore/python/framework/eventsfwlite.py b/PhysicsTools/HeppyCore/python/framework/eventsfwlite.py
@@ -1,5 +1,8 @@
 from DataFormats.FWLite import Events as FWLiteEvents
 
+import logging
+import pprint
+
 from ROOT import gROOT, gSystem, AutoLibraryLoader
 
 print "Loading FW Lite"
@@ -15,16 +18,24 @@
 
 class Events(object):
     def __init__(self, files, tree_name,  options=None):
-	if options is not None :
-		if not hasattr(options,"inputFiles"):
-		 	options.inputFiles=files
-		if not hasattr(options,"maxEvents"):
-			options.maxEvents = 0	
-		if not hasattr(options,"secondaryInputFiles"):
-			options.secondaryInputFiles = []
-	        self.events = FWLiteEvents(options=options)
+        logging.info(
+            'opening input files:\n{}'.format(pprint.pformat(files))
+            )
+        if options is not None :
+            if not hasattr(options,"inputFiles"):
+                options.inputFiles=files
+            if not hasattr(options,"maxEvents"):
+                options.maxEvents = 0	
+            if not hasattr(options,"secondaryInputFiles"):
+                options.secondaryInputFiles = []
+            elif options.secondaryInputFiles: # only if it's a non-empty list
+                logging.info('using secondary input files:\n{}'.format(
+                        pprint.pformat(options.secondaryInputFiles)
+                        ))
+            self.events = FWLiteEvents(options=options)
 	else :
-	        self.events = FWLiteEvents(files)
+            self.events = FWLiteEvents(files)
+        logging.info('done')
 
     def __len__(self):
         return self.events.size()

diff --git a/PhysicsTools/HeppyCore/python/framework/heppy_loop.py b/PhysicsTools/HeppyCore/python/framework/heppy_loop.py
@@ -45,7 +45,6 @@ def runLoopAsync(comp, outDir, configName, options):
 _globalGracefulStopFlag = multiprocessing.Value('i',0)
 def runLoop( comp, outDir, config, options):
     fullName = '/'.join( [outDir, comp.name ] )
-    # import pdb; pdb.set_trace()
     config.components = [comp]
     memcheck = 2 if getattr(options,'memCheck',False) else -1
     loop = Looper( fullName,

diff --git a/PhysicsTools/HeppyCore/python/framework/looper.py b/PhysicsTools/HeppyCore/python/framework/looper.py
@@ -137,10 +137,13 @@ def doSigUsr2(sig,frame):
         # so that analyzers cannot modify the config of other analyzers. 
         # but cannot copy the autofill config.
         self.setup = Setup(config, services)
+        self.logger.info('looper initialized')
 
     def _build(self, cfg):
+        self.logger.info('building {} ...'.format(cfg.name))
         theClass = cfg.class_object
         obj = theClass( cfg, self.cfg_comp, self.outDir )
+        self.logger.info('done')
         return obj
 
     def _prepareOutput(self, name):
@@ -170,6 +173,7 @@ def loop(self):
         nEvents = self.nEvents
         firstEvent = self.firstEvent
         iEv = firstEvent
+        self.logger.info('deciding on the number of events (can take a long time for a lot of input files...)')
         if nEvents is None or int(nEvents) > len(self.events) :
             nEvents = len(self.events)
         else:
@@ -181,11 +185,15 @@ def loop(self):
                                                         eventSize=eventSize))
         self.logger.info( str( self.cfg_comp ) )
         for analyzer in self.analyzers:
+            self.logger.info('starting ' + analyzer.name)
             analyzer.beginLoop(self.setup)
+        self.logger.info('beginLoop done')
         try:
+            at_firstEvent = True
             for iEv in range(firstEvent, firstEvent+eventSize):
-                # if iEv == nEvents:
-                #     break
+                if at_firstEvent:
+                    self.logger.info('processing first event')
+                self.process( iEv )
                 if iEv%100 ==0:
                     # print 'event', iEv
                     if not hasattr(self,'start_time'):
@@ -195,7 +203,9 @@ def loop(self):
                     else:
                         print 'event %d (%.1f ev/s)' % (iEv, (iEv-self.start_time_event)/float(timeit.default_timer() - self.start_time))
 
-                self.process( iEv )
+                if at_firstEvent:
+                    self.logger.info('done first event')
+                    at_firstEvent = False
                 if iEv<self.nPrint:
                     print self.event
                 if self.stopFlag and self.stopFlag.value: