In [1]:
import os
import numpy
import json
from coffea import processor, hist, util
from coffea.util import save, load
from optparse import OptionParser
from coffea.lookup_tools.dense_lookup import dense_lookup
import awkward as ak


In [2]:
class BTagEfficiency(processor.ProcessorABC):

    def __init__(self, year,wp):
        self._year = year
        self._btagWPs = wp
        self._accumulator = processor.dict_accumulator({
            'deepflav' :
            hist.Hist(
                'Events',
                hist.Cat('dataset', 'Dataset'),
                hist.Cat('wp', 'Working point'),
                hist.Cat('btag', 'BTag WP pass/fail'),
                hist.Bin('flavor', 'Jet hadronFlavour', [0, 4, 5, 6]),
                hist.Bin('pt', 'Jet pT', [20, 30, 50, 70, 100, 140, 200, 300, 600, 1000]),
                hist.Bin('abseta', 'Jet abseta', [0, 1.4, 2.0, 2.5]),
            ),
            'deepcsv' :
            hist.Hist(
                'Events',
                hist.Cat('dataset', 'Dataset'),
                hist.Cat('wp', 'Working point'),
                hist.Cat('btag', 'BTag WP pass/fail'),
                hist.Bin('flavor', 'Jet hadronFlavour', [0, 4, 5, 6]),
                hist.Bin('pt', 'Jet pT', [20, 30, 50, 70, 100, 140, 200, 300, 600, 1000]),
                hist.Bin('abseta', 'Jet abseta', [0, 1.4, 2.0, 2.5]),
            )
        })

    @property
    def accumulator(self):
        return self._accumulator

    def process(self, events):
        
        dataset = events.metadata['dataset']

        jets = events.Jet[
            (events.Jet.pt > 30.)
            & (abs(events.Jet.eta) < 2.5)
            & (events.Jet.jetId & 2)  # tight id
        ]
        
        name = {}
        name['deepflav']= 'btagDeepFlavB'
        name['deepcsv']= 'btagDeepB'

        out = self.accumulator.identity()

        for wp in ['loose','medium','tight']:
            for tagger in ['deepflav','deepcsv']:
                passbtag = jets[name[tagger]] > self._btagWPs[tagger][self._year][wp]
                out[tagger].fill(
                    dataset=dataset,
                    wp=wp,
                    btag='pass',
                    flavor=ak.flatten(jets[passbtag].hadronFlavour),
                    pt=ak.flatten(jets[passbtag].pt),
                    abseta=ak.flatten(abs(jets[passbtag].eta)),
                )
                out[tagger].fill(
                    dataset=dataset,
                    wp=wp,
                    btag='fail',
                    flavor=ak.flatten(jets[~passbtag].hadronFlavour),
                    pt=ak.flatten(jets[~passbtag].pt),
                    abseta=ak.flatten(abs(jets[~passbtag].eta)),
                )
        return out

    def postprocess(self, a):
        return a



In [3]:

year = '2018'
jsonfile = 'KIT_UL_2018_v3.json'

with open('metadata/'+jsonfile) as fin:
    samplefiles = json.load(fin)

    common = load('data/common.coffea')
    processor_instance=BTagEfficiency(year=year,wp=common['btagWPs'])

    save(processor_instance, 'data/btagUL'+year+'.processor')                      

In [4]:
import lz4.frame as lz4f
import pickle
import json
import time
import cloudpickle
import gzip
import os
from optparse import OptionParser
from coffea.nanoevents import NanoAODSchema, NanoEventsFactory
from coffea.nanoevents.methods import nanoaod

NanoAODSchema.warn_missing_crossrefs = False
import uproot
import numpy as np
from coffea import hist, processor
from coffea.util import load, save


In [5]:

processor_name = 'btagUL2018'
metadata = jsonfile.split('.')[0]
print(metadata)
datasets = False
workers = 8

processor_instance=load('data/'+processor_name+'.processor')

fileslice = slice(None)
with open("metadata/"+metadata+".json") as fin:
    samplefiles = json.load(fin)


KIT_UL_2018_v3


In [12]:

for dataset, info in samplefiles.items():
    if not dataset == 'QCD_Pt_800to1000_TuneCP5_13TeV_pythia8____14_': continue
    filelist = {}
    if datasets and datasets not in dataset: continue
    print('Processing:',dataset)
    files = []
    for file in info['files'][fileslice]:
        files.append(file)
    filelist[dataset] = files

    tstart = time.time()
    output = processor.run_uproot_job(filelist,
                                      "Events",
                                       processor_instance=processor_instance,
                                       executor=processor.futures_executor,
                                       executor_args={'schema': NanoAODSchema,'workers': workers},
                                       )
#     output = processor.run_uproot_job(filelist,
#                                       treename='Events',
#                                       processor_instance=processor_instance,
#                                       executor=processor.futures_executor,
#                                       executor_args={'nano': True, 'workers': options.workers},
#                                       )
    
    #nbins = sum(sum(arr.size for arr in h._sumw.values()) for h in output.values() if isinstance(h, hist.Hist))
    #nfilled = sum(sum(np.sum(arr > 0) for arr in h._sumw.values()) for h in output.values() if isinstance(h, hist.Hist))
    #print("Filled %.1fM bins" % (nbins/1e6, ))
    #print("Nonzero bins: %.1f%%" % (100*nfilled/nbins, ))

    os.system("mkdir -p hists/"+processor_name)
    save(output,'hists/'+processor_name+'/'+dataset+'.futures')        
    dt = time.time() - tstart
    nworkers = workers
    print("%.2f us*cpu overall" % (1e6*dt*nworkers, ))                                

Processing: QCD_Pt_800to1000_TuneCP5_13TeV_pythia8____14_


Processing:   0%|          | 0/3 [00:00<?, ?chunk/s]

4526139322.28 us*cpu overall
