In [None]:
import os
import sys
sys.path.append('../../')
import yaml
import uproot
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import mplhep 
mplhep.style.use('CMS')
from coffea.nanoevents import NanoEventsFactory, BaseSchema, NanoAODSchema
from coffea import util
from btag_eff_processor import bTagEffProcessor
from btag_utils import *
from utils.sample_utils import *

In [None]:
fileset = get_fileset(os.path.join('../../sample_lists/sample_yamls', "MC_UL_2018.yaml"))#,
fileset = {k: v for k, v in fileset.items()}

In [None]:
proc_instance = bTagEffProcessor()
    
out = processor.run_uproot_job(
        fileset,
        treename="Events",
        processor_instance=proc_instance,
        executor=processor.futures_executor,
        executor_args={"schema": NanoAODSchema, 'workers': 2},
)

In [None]:
out.keys()

In [None]:
mc = out['TTToSemiLeptonic_2018']
nbjets = mc['nbjets'].value
nbtags = mc['nbtags'].value
pt_bins = mc['pt_bins'].value
eta_bins = mc['eta_bins'].value

In [None]:
unique_pt_bins = np.unique(mc['pt_bins'].value)
unique_eta_bins = np.unique(mc['eta_bins'].value)
for pt_bin in unique_pt_bins:
    for eta_bin in unique_eta_bins:
        in_bin = ((pt_bins==pt_bin) & (eta_bins==eta_bin))
        num = sum(nbtags[in_bin])
        denom = sum(nbjets[in_bin])
        eff = num/denom if denom!=0 else 0
        print(pt_bin, eta_bin, eff)

In [None]:
out = util.load('UL_2018/MC_UL_2018_btag_effs.coffea')
out

In [None]:
pt_bins = out['TTToSemiLeptonic_2018']['pt_bins'].value
unique_pt_bins = np.unique(pt_bins)
eta_bins = out['TTToSemiLeptonic_2018']['eta_bins'].value
unique_eta_bins = np.unique(eta_bins)
print(unique_pt_bins)
print(unique_eta_bins)

In [None]:
tables = {}
for sample in out.keys():
    data = out[sample]
    pt_bins = data['pt_bins'].value
    eta_bins = data['eta_bins'].value
    nbjets = data['nbjets'].value
    nbtags = data['nbtags'].value
    table = {}
    for i, pt_bin in enumerate(unique_pt_bins):
        for j, eta_bin in enumerate(unique_eta_bins):
            in_bin = ((pt_bins==pt_bin) & (eta_bins==eta_bin))
            num = sum(nbtags[in_bin])
            denom = sum(nbjets[in_bin])
            eff = num/denom if denom!=0 else 0
            table[(i,j)] = eff
    for j, eta_bin in enumerate(unique_eta_bins):
        mpt = len(unique_pt_bins)
        table[(mpt, j)] = table[(mpt-1,j)]
    for i , pt_bin in enumerate(unique_pt_bins):
        meta = len(unique_eta_bins)
        table[(meta, j)] = table[(meta-1,j)]
        
    tables[sample] = table

In [None]:
import pandas as pd
sample = 'TTToSemiLeptonic_2018'
ttbar_lookup = tables[sample]
pt_test = [0, 59,29, 250, 2]
p = np.digitize(pt_test, bins=unique_pt_bins)
eta_test = [2, 0.4, 0, 0.9]
e = np.digitize(eta_test, bins=unique_eta_bins)
pe = list(zip(p, e))
weights = [ttbar_lookup[t] for t in pe]
weights

In [None]:
table, pt_bins, eta_bins = get_btag_tables('.', '2018', True)
get_btag_effs(table, pt_bins, eta_bins, 'TTToSemiLeptonic_2018', pt_test, eta_test)

In [None]:
import awkward as ak
import correctionlib
base = '/eos/uscms/store/group/lpcsusyhiggs/ntuples/AZh/nAODv9/2018/DY4JetsToLLM-50'
file = join(base, 'all_DY4JetsToLLM-50_file001_part_1of3_Electrons.root')
events = NanoEventsFactory.from_root(file, schemaclass=NanoAODSchema).events()
jet = events.Jet

def get_baseline_jets(jet, cutflow, year='2018'):
    obj = 'baseline_jets'
    baseline_j = jet[(jet.pt > 20)]
    eta_per_year = {'2018': 2.5, '2017': 2.5,
                    '2016postVFP': 2.4, '2016preVFP': 2.4}
    baseline_j = baseline_j[(np.abs(baseline_j.eta) < eta_per_year[year])]
    baseline_j = baseline_j[(baseline_j.jetId > 0)]
    return baseline_j

def is_baseline_bjet(baseline_j, cutflow, year='2018'):
    obj = 'baseline bjets'
    delta = {'2016preVFP': 0.2598, '2016postVFP': 0.2598,
             '2017': 0.3040, '2018': 0.2783}
    return baseline_j.btagDeepFlavB > delta[year]

jets = get_baseline_jets(jet, None, '2018')

def get_btag_weights(infile):
    return correctionlib.CorrectionSet.from_file(infile)

infile = 'UL_2018/btagging.json.gz'
SF_tool = get_btag_weights(infile)
for c in SF_tool.keys(): print(c)

btag_SFs = SF_tool['deepJet_comb']
systematic = 'central'
working_point = 'M'
jets = jets[abs(jets.partonFlavour)==5]
flat_j, num_j = ak.flatten(jets), ak.num(jets)
pt, eta = flat_j.pt, flat_j.eta
is_tagged = ak.flatten(is_baseline_bjet(jets, None, '2018'))
SFs = corr.evaluate(systematic, working_point, 5, 
                    abs(ak.to_numpy(eta)), ak.to_numpy(pt))
btag = np.array(get_btag_effs(table, pt_bins, eta_bins, sample, pt, abs(eta)))
w_is_tagged = btag * is_tagged
w_not_tagged = (1-btag) * ~is_tagged
w = w_is_tagged + w_not_tagged
w_MC = w
w_is_tagged = btag * is_tagged * SFs
w_is_not_tagged = (1-btag*SFs) * ~is_tagged
w = w_is_tagged + w_is_not_tagged
w = w/w_MC
ak.prod(ak.unflatten(w, num_j), axis=1)

In [None]:
btag_eff_tables, btag_pt_bins, btag_eta_bins = get_btag_tables('.', '2018', True)
get_btag_effs(table, pt_bins, eta_bins, 'TTToSemiLeptonic_2018', pt_test, eta_test)

def apply_btag_corrections(jets, dataset, systematic):
    jets = jets[abs(jets.partonFlavour)==5]
    flat_j, num_j = ak.flatten(jets), ak.num(jets)
    pt, eta = flat_j.pt, flat_j.eta
    delta = {'2016preVFP': 0.2598, '2016postVFP': 0.2598,
             '2017': 0.3040, '2018': 0.2783}
    year = dataset.split('_')[-1]
    is_tagged = (flat_j.btagDeepFlavB > delta[year])
    SFs = btag_SFs.evaluate(systematic, 'M', 5,
                                 abs(ak.to_numpy(eta)), ak.to_numpy(pt))
    btag_effs = np.array(get_btag_effs(btag_eff_tables,
                                       btag_pt_bins,
                                       btag_eta_bins,
                                       dataset, pt, abs(eta)))
    w_is_tagged = is_tagged * btag_effs
    w_not_tagged = (1-btag_effs) * ~is_tagged
    w_MC = w_is_tagged + w_not_tagged
    w_is_tagged = btag * is_tagged * SFs
    w_is_not_tagged = (1-btag*SFs) * ~is_tagged
    w = (w_is_tagged + w_is_not_tagged)/w_MC
    return ak.prod(ak.unflatten(w, num_j), axis=1)

apply_btag_corrections(jets, sample, 'central')
