In [1]:
import numpy as np
%matplotlib inline
from coffea import hist
import coffea.processor as processor
import awkward as ak

In [2]:
# This program plots a per-event array (jet_pt) that has been masked to meet certain conditions (in this case, abs(jet eta) < 1).

class Processor(processor.ProcessorABC):
    def __init__(self):
        dataset_axis = hist.Cat("dataset", "")
        Jet_axis = hist.Bin("Jet_pt", "Jet_pt [GeV]", 100, 15, 60)
        
        self._accumulator = processor.dict_accumulator({
            'Jet_pt': hist.Hist("Counts", dataset_axis, Jet_axis),
            'cutflow': processor.defaultdict_accumulator(int)
        })
    
    @property
    def accumulator(self):
        return self._accumulator
    
    def process(self, events):
        output = self.accumulator.identity()
        
        dataset = events.metadata['dataset']
        
        # NanoEvents allows easy access to our jets with the Jet column.
        jets = events.Jet

        output['cutflow']['all events'] += ak.size(jets, axis=0)
        output['cutflow']['number of jets'] += ak.sum(ak.num(jets))
        
        # We want jets with an abs(eta) < 1. Conditionals act on every value in an array in Coffea, so this is easy.
        eta_max = (np.absolute(jets.eta) < 1)
        # eta_max is a Boolean array, with True in the place of values where the condition is met, and False otherwise. We want to sum up all the Trues (=1) in each sublist, then sum up all the sublists to get the number of jets with pt > 20.
        output['cutflow']['abs(eta) < 1'] += ak.sum(ak.sum(eta_max, axis=1))
            
        # We define good_jets as the actual jets we want to graph. We mask it with the jets that have abs(eta) < 1.
        good_jets = jets[eta_max]
        # good_jets is no longer a Boolean array, so we can't just sum up the True's. We count the amount of jets and sum that.
        output['cutflow']['final good jets'] += ak.sum(ak.num(good_jets, axis=1))
        
        output['Jet_pt'].fill(dataset=dataset, Jet_pt=ak.flatten(good_jets.pt))
        return output

    def postprocess(self, accumulator):
        return accumulator

In [None]:
from dask.distributed import Client
import time

client = Client("tls://localhost:8786")

# Our file is missing some cross-references, so we have to make NanoAOD push warnings instead of erroring out.
# This ultimately isn't a problem, it's just a constraint of the public NanoAOD we're using.
def fix():
    from coffea.nanoevents import NanoAODSchema
    NanoAODSchema.warn_missing_crossrefs = True

client.register_worker_callbacks(fix)
processor.NanoAODSchema.warn_missing_crossrefs = True

fileset = {'SingleMu' : ["root://eospublic.cern.ch//eos/root-eos/benchmark/Run2012B_SingleMu.root"]}

output = processor.run_uproot_job(fileset,
                                treename = 'Events',
                                processor_instance = Processor(),
                                executor = processor.dask_executor,
                                executor_args = {'schema': processor.NanoAODSchema, 'client': client}
                                )

[####################################### ] | 99% Completed |  1min 50.9s

In [None]:
hist.plot1d(output['Jet_pt'], overlay='dataset', fill_opts={'edgecolor': (0,0,0,0.3), 'alpha': 0.8})

In [None]:
for key, value in output['cutflow'].items():
    print(key, value)