# Applying corrections to columnar data

Here we will show how to use the lookup_tools package in fnal-column-analysis-tools (FCAT).
It is able to read in a variety of correction files as lookup tables.
We also cover here the CMS-specific JET-MET tools that are provided with FCAT.

# Testing Data Generators

In [None]:
from coffea.util import awkward
from coffea.util import numpy as np
import uproot_methods


def dummy_jagged_eta_pt():
    np.random.seed(42)
    counts = np.random.exponential(2, size=50).astype(int)
    entries = np.sum(counts)
    test_eta = np.random.uniform(-3., 3., size=entries)
    test_pt = np.random.exponential(10., size=entries)+np.random.exponential(10, size=entries)
    return (counts, test_eta, test_pt)

def dummy_four_momenta():
    np.random.seed(12345)
    nrows = 1000
    counts = np.minimum(np.random.exponential(0.5, size=nrows).astype(int), 20)
    
    px = np.random.normal(loc=20.0,scale=5.0,size=np.sum(counts))
    py = np.random.normal(loc=20.0,scale=5.0,size=np.sum(counts))
    pz = np.random.normal(loc=0, scale=55, size=np.sum(counts))
    m_pi = np.full_like(px,fill_value=0.135)
    energy = np.sqrt(px*px + py*py + pz*pz + m_pi*m_pi)
    return (counts,px,py,pz,energy)

def dummy_events():
    counts, px, py, pz, energy = dummy_four_momenta()
    thep4 = np.stack((px,py,pz,energy)).T

    class obj(object):
        def __init__(self):
            self.p4 = thep4
            self.px = px
            self.py = py
            self.pz = pz
            self.en = energy
            self.pt = np.hypot(px,py)
            self.phi = np.arctan2(py,px)
            self.eta = np.arctanh(pz/np.sqrt(px*px + py*py + pz*pz))
            self.mass = np.sqrt(np.abs(energy*energy - (px*px + py*py + pz*pz)))
            self.blah = energy*px
            self.count = counts
    
    class events(object):
        def __init__(self):
            self.thing = obj()
    
    return events()

def gen_reco_TLV():
    gen_pt = awkward.JaggedArray.fromiter([[10.0, 20.0, 30.0], [], [40.0, 50.0]])
    reco_pt = awkward.JaggedArray.fromiter([[20.2, 10.1, 30.3, 50.5], [50.5], [60]])

    gen_eta = awkward.JaggedArray.fromiter([[-3.0, -2.0, 2.0], [], [-1.0, 1.0]])
    reco_eta = awkward.JaggedArray.fromiter([[-2.2, -3.3, 2.2, 0.0], [0.0], [1.1]])

    gen_phi = awkward.JaggedArray.fromiter([[-1.5, 0.0, 1.5], [], [0.78, -0.78]])
    reco_phi = awkward.JaggedArray.fromiter([[ 0.1, -1.4, 1.4, 0.78], [0.78], [-0.77]])

    gen = uproot_methods.TLorentzVectorArray.from_ptetaphim(gen_pt, gen_eta, gen_phi, 0.2)
    reco = uproot_methods.TLorentzVectorArray.from_ptetaphim(reco_pt, reco_eta, reco_phi, 0.2)

    return (gen, reco)

In [None]:
from coffea.lookup_tools import extractor

# Opening a root file and using it as a lookup table

In [None]:
ext = extractor()
ext.add_weight_sets(["testSF2d scalefactors_Tight_Electron ../tests/samples/testSF2d.histo.root"])
ext.finalize()

evaluator = ext.make_evaluator()

print(dir(evaluator))
print()
print(evaluator['testSF2d'])
print()
print(type(evaluator['testSF2d']))
print()

counts, test_eta, test_pt = dummy_jagged_eta_pt()

# test flat eval
test_out = evaluator["testSF2d"](test_eta, test_pt)

# test structured eval
test_eta_jagged = awkward.JaggedArray.fromcounts(counts, test_eta)
test_pt_jagged = awkward.JaggedArray.fromcounts(counts, test_pt)
test_out_jagged = evaluator["testSF2d"](test_eta_jagged, test_pt_jagged)

print('JAGGED SHAPE COMPARISON')
print('eta      :',test_eta_jagged.counts)
print('pt     :',test_pt_jagged.counts)
print('scaling :',test_out_jagged.counts)

print('output scaling :',test_out_jagged)

# Plotting with weights

In [None]:
from coffea import hist

weights_axis = hist.Cat("wgtname", "With/Without Weights")
eta_axis = hist.Bin("eta", r"$\eta", 20, -2.5, 2.5)
pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 20, 10, 30)

eta_hist = hist.Hist("Counts", weights_axis, eta_axis)
pt_hist = hist.Hist("Counts", weights_axis, pt_axis)

eta_hist.fill(wgtname='weights',eta=test_eta_jagged.flatten(),weight=test_out_jagged.flatten())
eta_hist.fill(wgtname='noweights',eta=test_eta_jagged.flatten())

pt_hist.fill(wgtname='weights',pt=test_pt_jagged.flatten(),weight=test_out_jagged.flatten())
pt_hist.fill(wgtname='noweights',pt=test_pt_jagged.flatten())

In [None]:
fig, ax, _ = hist.plot1d(eta_hist, overlay='wgtname')


In [None]:
fig, ax, _ = hist.plot1d(pt_hist, overlay='wgtname')


# Importing B-tagging Scale Factors

In [None]:
%%bash
head -5 ../tests/samples/testBTagSF.btag.csv

In [None]:
ext = extractor()
ext.add_weight_sets(["testBTag * ../tests/samples/testBTagSF.btag.csv"])
ext.finalize()

evaluator = ext.make_evaluator()

print(dir(evaluator))
print()
print(evaluator['testBTagCSVv2_1_comb_up_0'])
print()
print(type(evaluator['testBTagCSVv2_1_comb_up_0']))
print()

counts, test_eta, test_pt = dummy_jagged_eta_pt()
# discriminant used for reshaping, zero otherwise
test_discr = np.zeros_like(test_eta)

sf_out = evaluator['testBTagCSVv2_1_comb_up_0'](test_eta, test_pt, test_discr)
print(sf_out)

# Importing JSON-encoded histograms

In [None]:
%%bash
head -10 ../tests/samples/EIDISO_WH_out.histo.json

In [None]:
ext = extractor()
ext.add_weight_sets(["* * ../tests/samples/EIDISO_WH_out.histo.json"])
ext.finalize()
    
evaluator = ext.make_evaluator()

print(dir(evaluator))
print()
print(evaluator['EIDISO_WH/eta_pt_ratio_value'])
print()
print(type(evaluator['EIDISO_WH/eta_pt_ratio_value']))
print()

counts, test_eta, test_pt = dummy_jagged_eta_pt()
    
sf_out = evaluator['EIDISO_WH/eta_pt_ratio_value'](test_eta, test_pt)
sf_err_out = evaluator['EIDISO_WH/eta_pt_ratio_error'](test_eta, test_pt)
print(sf_out)
print(sf_err_out)

# Import Jet Energy Scales and Uncertainties

In [None]:
%%bash
head -5 ../tests/samples/Fall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi.jec.txt

In [None]:
ext = extractor()
ext.add_weight_sets([
    "* * ../tests/samples/Fall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi.jec.txt",
    "* * ../tests/samples/Fall17_17Nov2017_V32_MC_Uncertainty_AK4PFPuppi.junc.txt",
])
ext.finalize()

evaluator = ext.make_evaluator()

print(dir(evaluator))
print()
print(evaluator['Fall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi'])
print()
print(evaluator['Fall17_17Nov2017_V32_MC_Uncertainty_AK4PFPuppi'])
print()
print(type(evaluator['Fall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi']))
print()
print(type(evaluator['Fall17_17Nov2017_V32_MC_Uncertainty_AK4PFPuppi']))
print()

counts, test_eta, test_pt = dummy_jagged_eta_pt()
    
jec_out = evaluator['Fall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi'](test_eta,test_pt)

print(jec_out)

junc_out = evaluator['Fall17_17Nov2017_V32_MC_Uncertainty_AK4PFPuppi'](test_eta,test_pt)

print(junc_out)

In [None]:
%%bash
head -4 ../tests/samples/Autumn18_V8_MC_UncertaintySources_AK4PFchs.junc.txt

In [None]:
ext = extractor()
ext.add_weight_sets([
    "* * ../tests/samples/Autumn18_V8_MC_UncertaintySources_AK4PFchs.junc.txt",
])
ext.finalize()

evaluator = ext.make_evaluator()

print(dir(evaluator))
print()
print(evaluator['Autumn18_V8_MC_UncertaintySources_AK4PFchs_AbsoluteScale'])
print()
print(type(evaluator['Autumn18_V8_MC_UncertaintySources_AK4PFchs_AbsoluteScale']))
print()

bysource_junc_out = evaluator['Autumn18_V8_MC_UncertaintySources_AK4PFchs_AbsoluteScale'](test_eta,test_pt)
print(bysource_junc_out)

# Applying energy scale transformations to Jets

In [None]:
from coffea.analysis_objects import JaggedCandidateArray as CandArray
from coffea.jetmet_tools import FactorizedJetCorrector,JetCorrectionUncertainty
from coffea.jetmet_tools import JetTransformer

ext = extractor()
ext.add_weight_sets([
    "* * ../tests/samples/Fall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi.jec.txt",
    "* * ../tests/samples/Fall17_17Nov2017_V32_MC_Uncertainty_AK4PFPuppi.junc.txt",
])
ext.finalize()

evaluator = ext.make_evaluator()

print(dir(evaluator))
print()

counts, px, py, pz, E = dummy_four_momenta()

jets = CandArray.candidatesfromcounts(counts,px=px,py=py,pz=pz,energy=E)
jets.add_attributes(ptRaw=jets.pt,
                    massRaw=jets.mass)

jec_names = ['Fall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi']
junc_names = ['Fall17_17Nov2017_V32_MC_Uncertainty_AK4PFPuppi']
corrector = FactorizedJetCorrector(**{name: evaluator[name] for name in jec_names})
uncertainties = JetCorrectionUncertainty(**{name:evaluator[name] for name in junc_names})

transformer = JetTransformer(jec=corrector,junc=uncertainties)
### more possibilities are available if you send in more pieces of the JEC stack
# mc2016_ak8_jxform = JetTransformer(jec=MC_AK8JEC2016,junc=MC_AK8JUNC2016
#                                    jer=MC_AK8JER2016,jersf=MC_AK8JERSF2016)

print()
print('starting columns:',jets.columns)
print()

print('untransformed pt ratios',jets.pt/jets.ptRaw)
print('untransformed mass ratios',jets.mass/jets.massRaw)

transformer.transform(jets)

print('transformed pt ratios',jets.pt/jets.ptRaw)
print('transformed mass ratios',jets.mass/jets.massRaw)

print()
print('transformed columns:',jets.columns)
print()

print('JES UP pt ratio',jets.pt_AK4PFPuppi_up/jets.ptRaw)
print('JET DOWN pt ratio',jets.pt_AK4PFPuppi_down/jets.ptRaw)