In [1]:
from __future__ import print_function, division
import uproot
import matplotlib.pyplot as plt
import numpy as np
import gzip

%matplotlib inline
from fnal_column_analysis_tools import hist

In [2]:
flist = {
    "ZJetsToQQ_HT600to800_qc19_4j_TuneCP5_13TeV": [
        "ZJetsToQQ_HT600to800_qc19_4j_TuneCP5_13TeV.root",
    ],
    "ZJetsToQQ_HT_800toInf_qc19_4j_TuneCP5_13TeV": [
        "ZJetsToQQ_HT_800toInf_qc19_4j_TuneCP5_13TeV.root",
    ],
    "WJetsToQQ_HT600to800_qc19_3j_TuneCP5_13TeV": [
        "WJetsToQQ_HT600to800_qc19_3j_TuneCP5_13TeV.root",
    ],
    "WJetsToQQ_HT_800toInf_qc19_3j_TuneCP5_13TeV": [
        "WJetsToQQ_HT_800toInf_qc19_3j_TuneCP5_13TeV.root",
    ],
}

# [fb]
dataset_xs = {
    "ZJetsToQQ_HT600to800_qc19_4j_TuneCP5_13TeV": 34.0,
    "ZJetsToQQ_HT_800toInf_qc19_4j_TuneCP5_13TeV": 18.67,
    "WJetsToQQ_HT600to800_qc19_3j_TuneCP5_13TeV": 68.57,
    "WJetsToQQ_HT_800toInf_qc19_3j_TuneCP5_13TeV": 34.9,
}

lumi = 1000.  # /fb

In [3]:
final = hist.Hist("Events / bin", 
              hist.Cat("dataset", "Dataset name"),
              hist.Bin("gencat", "Gen. boson type", 4, 0, 4),
              hist.Bin("doubleB", "DeepCSV double-B score", 40, 0, 1), 
              hist.Bin("msd", "Soft-drop mass [GeV]", 23, 40, 201),
              hist.Bin("ptj", "Jet $p_T$ [GeV]", [450, 500, 550, 600, 675, 800, 1000])
            )

In [4]:
%%time

final.clear()
fields = [
    "evtWeight",
    "kfactor",
    "kfactorNLO",
    "AK8Puppijet0_doublecsv",
    "AK8Puppijet0_msd",
    "AK8Puppijet0_pt",
    "AK8Puppijet0_isHadronicV"
]

nevents = {}
for dataset, files in flist.items():
    for file in files:
        baconbits = uproot.open(file)["Events"]
        nevents[dataset] = nevents.get(dataset, 0.) + baconbits.numentries
        events = baconbits.arrays(fields, namedecode='ascii')
        weight = events["evtWeight"] * events["kfactor"] * events["kfactorNLO"]
        final.fill(
                dataset=dataset,
                doubleB=events["AK8Puppijet0_doublecsv"],
                msd=events["AK8Puppijet0_msd"], 
                ptj=events["AK8Puppijet0_pt"],
                gencat=events["AK8Puppijet0_isHadronicV"],
                weight=weight
            )

CPU times: user 11.2 s, sys: 1.67 s, total: 12.8 s
Wall time: 18.5 s


In [5]:
scale = dict((ds, lumi * dataset_xs[ds] / nevents[ds]) for ds in dataset_xs.keys())
final.scale(scale, axis="dataset")
print("Events processed:", sum(n for n in nevents.values()))

Events processed: 10227949.0


In [6]:
import pickle
with gzip.open("final.pkl.gz", "wb") as fout:
    pickle.dump(final, fout)