In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
sys.path.append('../')
import argparse
import logging
import time
from os.path import join
import warnings
warnings.filterwarnings("ignore")

import uproot
from coffea import processor, util
from coffea.lumi_tools import LumiMask
from coffea.nanoevents import NanoAODSchema, NanoEventsFactory

from azh_analysis.processors.analysis_processor import AnalysisProcessor
from azh_analysis.utils.btag import get_btag_SFs, get_btag_tables
from azh_analysis.utils.corrections import (
    dyjets_stitch_weights,
    get_electron_ES_weights,
    get_electron_ID_weights,
    get_electron_trigger_SFs,
    get_fake_rates,
    get_muon_ES_weights,
    get_muon_ID_weights,
    get_muon_trigger_SFs,
    get_pileup_weights,
    get_tau_ID_weights,
)
from azh_analysis.utils.sample import get_fileset, get_nevts_dict, get_sample_info

# setup logging
log_format = "%(asctime)s %(levelname)s %(message)s"
logging.basicConfig(level=logging.INFO, format=log_format)
logging.info("Initializing")

# relevant parameters
year, source = '2017', 'MC_UL'

# load up golden jsons
golden_json_dir = "../samples/data_certification"
golden_jsons = {
    "2018": join(golden_json_dir, "data_cert_2018.json"),
    "2017": join(golden_json_dir, "data_cert_2017.json"),
    "2016postVFP": join(golden_json_dir, "data_cert_2016.json"),
    "2016preVFP": join(golden_json_dir, "data_cert_2016.json"),
}
lumi_masks = {year: LumiMask(golden_json) for year, golden_json in golden_jsons.items()}

# load up fake rates
fr_base = f"../corrections/fake_rates/UL_{year}"
fake_rates = get_fake_rates(fr_base, year)
logging.info(f"Using fake rates\n{fr_base}")

# load up electron / muon / tau IDs
eID_base = f"../corrections/electron_ID/UL_{year}"
eID_file = join(
    eID_base, f"Electron_RunUL{year}_IdIso_AZh_IsoLt0p15_IdFall17MVA90noIsov2.root"
)

eIDs = get_electron_ID_weights(eID_file)
logging.info(f"Using eID_SFs:\n{eID_file}")

eES_SFs = get_electron_ES_weights("../corrections/electron_ES/", year)
logging.info(f"Using eES_SFs from corrections/ele_ES/UL_{year}")

mES_SFs = get_muon_ES_weights("../corrections/muon_ES/", year)
logging.info(f"Using mES_SFs from corrections/muon_ES/UL_{year}")

mID_base = f"../corrections/muon_ID/UL_{year}"
mID_file = join(mID_base, f"Muon_RunUL{year}_IdIso_AZh_IsoLt0p15_IdLoose.root")
mIDs = get_muon_ID_weights(mID_file)
logging.info(f"Using mID_SFs:\n{mID_file}")

tID_base = f"../corrections/tau_ID/UL_{year}"
tID_file = join(tID_base, "tau.corr.json")
tIDs = get_tau_ID_weights(tID_file)
logging.info(f"Using tID_SFs:\n{tID_file}")

# load up electron / muon trigger SFs
e_trigs = {
    "2016preVFP": "Ele25_EtaLt2p1",
    "2016postVFP": "Ele25_EtaLt2p1",
    "2017": "Ele35",
    "2018": "Ele35",
}
e_trig_base = f"../corrections/electron_trigger/UL_{year}"
e_trig_file = join(e_trig_base, f"Electron_RunUL{year}_{e_trigs[year]}.root")
e_trig_SFs = get_electron_trigger_SFs(e_trig_file)

m_trigs = {
    "2016preVFP": "IsoMu24orIsoTkMu24",
    "2016postVFP": "IsoMu24orIsoTkMu24",
    "2017": "IsoMu27",
    "2018": "IsoMu27",
}
m_trig_base = f"../corrections/muon_trigger/UL_{year}"
m_trig_file = join(m_trig_base, f"Muon_RunUL{year}_{m_trigs[year]}.root")
m_trig_SFs = get_muon_trigger_SFs(m_trig_file)

# load up btagging tables
btag_root = "../corrections/btag/"
btag_tables = get_btag_tables(btag_root, f"{year}", UL=True)
btag_SFs = get_btag_SFs(btag_root, f"{year}", UL=True)

# load up non-signal MC csv / yaml files
fset_string = f"{source}_{year}"
sample_info = get_sample_info(join("../samples", fset_string + ".csv"))
fileset = get_fileset(join("../samples/filesets", fset_string + ".yaml"))
pileup_weights = None
if "MC" in source or "signal" in source:
    pileup_weights = get_pileup_weights("../corrections/pileup/", year=year)

# only run over root files
for sample, files in fileset.items():
    good_files = []
    for f in files:
        if f.split(".")[-1] == "root":
            good_files.append(f)
    fileset[sample] = good_files
logging.info(f"running on\n {fileset.keys()}")

# extract the sum_of_weights from the ntuples
nevts_dict, dyjets_weights = None, None

if "MC" in source:
    nevts_dict = get_nevts_dict(fileset, year)
    print("fileset keys", fileset.keys())
    if f"DYJetsToLLM-50_{year}" in fileset.keys():
        dyjets_weights = dyjets_stitch_weights(sample_info, nevts_dict, year)
        
# load up signal MC csv / yaml files
fileset = {k: v for k, v in fileset.items() if "ZHHToWW" in k}

logging.info(f"Successfully built sum_of_weights dict:\n {nevts_dict}")
logging.info(f"Successfully built dyjets stitch weights:\n {dyjets_weights}")

# start timer, initiate cluster, ship over files
tic = time.time()

# instantiate processor module
proc_instance = AnalysisProcessor(
    source=source,
    year=year,
    sample_info=sample_info,
    fileset=fileset,
    pileup_weights=pileup_weights,
    lumi_masks=lumi_masks,
    nevts_dict=nevts_dict,
    eleID_SFs=eIDs,
    eleES_SFs=eES_SFs,
    muID_SFs=mIDs,
    muES_SFs=mES_SFs,
    tauID_SFs=tIDs,
    e_trig_SFs=e_trig_SFs,
    m_trig_SFs=m_trig_SFs,
    fake_rates=fake_rates,
    dyjets_weights=dyjets_weights,
    btag_eff_tables=btag_tables[0],
    btag_SFs=btag_SFs,
    btag_pt_bins=btag_tables[1],
    btag_eta_bins=btag_tables[2],
    run_fastmtt=True,
    systematic="all",
    same_sign=False,
    blind=False,
)

In [None]:
futures_run = processor.Runner(
    executor=processor.FuturesExecutor(compression=None, workers=1),
    schema=NanoAODSchema,
)

out = futures_run(
    fileset,
    "Events",
    processor_instance=proc_instance,
)

In [None]:
out["m4l"]["GluGluZHHToWW"][::sum, ::sum, ::sum, "tauID_11_down", "cons", :].values()

In [None]:
fake_rate = False
proc_instance = AnalysisProcessor(sample_info=sample_info,
                                  pileup_tables=pileup_tables,
                                  lumi_masks=lumi_masks,
                                  nevts_dict=nevts_dict,
                                  high_stats=True,
                                  eleID_SFs=eIDs, muID_SFs=mIDs, tauID_SFs=tIDs,
                                  fake_rates=fake_rates,
                                  dyjets_weights=dyjets_weights,
                                  e_trig_SFs=e_trig_SFs, m_trig_SFs=m_trig_SFs,
                                  btag_eff_tables=btag_eff_tables, btag_SFs=btag_SFs,
                                  btag_pt_bins=btag_pt_bins, btag_eta_bins=btag_eta_bins,
                                  run_fastmtt=True, fill_hists=True)

if fake_rate: 
    proc_instance = SS4lFakeRateProcessor(sample_info=sample_info,
                                          pileup_tables=pileup_tables,
                                          mode='tt',
                                          nevts_dict=nevts_dict,
                                          lumi_masks=lumi_masks,
                                          high_stats=True,
                                          eleID_SFs=eIDs,
                                          muID_SFs=mIDs,
                                          tauID_SFs=tIDs,
                                          dyjets_weights=dyjets_weights,
                                          e_trig_SFs=e_trig_SFs, m_trig_SFs=m_trig_SFs)
    to
out = processor.run_uproot_job(
        fileset ,
        treename="Events",
        processor_instance=proc_instance,
        executor=processor.futures_executor,
        executor_args={"schema": NanoAODSchema, 'workers': 1},
        chunksize=25000
)

In [None]:
import hist

base = '/eos/uscms/store/group/lpcsusyhiggs/ntuples/AZh/nAODv9/2018/DY4JetsToLLM-50'
file = join(base, 'all_DY4JetsToLLM-50_file001_part_1of3_Electrons.root')
events = NanoEventsFactory.from_root(file, schemaclass=NanoAODSchema).events()
met = events.MET
tau = events.Tau
ele = events.Electron
mu = events.Muon

dataset_axis = hist.axis.StrCategory(name="dataset", label="", categories=[], growth=True)
pt_axis = hist.axis.Regular(name="pt", label=r"$p_T$ [GeV]", bins=25, start=0, stop=250 )
cat_axis = hist.axis.StrCategory(name="category", label="", categories=[], growth=True)
dummy_axis = hist.axis.StrCategory(name="dummy", label="",  categories=[], growth=True)
output = hist.Hist(dataset_axis, pt_axis, cat_axis, dummy_axis)

N = 1000
tt = ak.cartesian({'t1': ele[:N], 't2': tau[:N]}, axis=1)
ll = ak.combinations(ele[:N], 2, axis=1, fields=['l1', 'l2'])
eeet = ak.cartesian({'tt': tt, 'll': ll}, axis=1)
eeet = eeet[(ak.argmax(eeet.tt.t1.pt, axis=1, keepdims=True))]
eeet = eeet[~ak.is_none(eeet, axis=1)]
tt = ak.cartesian({'t1': mu[:N], 't2': tau[:N]}, axis=1)
ll = ak.combinations(ele[:N], 2, axis=1, fields=['l1', 'l2'])
eemt = ak.cartesian({'tt': tt, 'll': ll}, axis=1)
eemt = eemt[(ak.argmax(eemt.tt.t1.pt, axis=1, keepdims=True))]
eemt = eemt[~ak.is_none(eemt, axis=1)]
eeet['category'] = 'eeet'
eemt['category'] = 'eemt'
eeet['weights'] = 0.8*np.ones(len(eeet))
eemt['weights'] = 0.9*np.ones(len(eemt))
cands = {'eeet': eeet, 'eemt': eemt}
lltt = ak.concatenate(list(cands.values()), axis=1)
lltt = lltt[ak.num(lltt)==1]
output.fill(dataset='test', pt=np.array(ak.flatten(lltt.ll.l1.pt)), 
            category=ak.to_numpy(ak.flatten(lltt.category)),
            dummy='nom', weight=ak.to_numpy(ak.flatten(lltt.weights)))
output.values()

In [None]:
from coffea import analysis_tools

base = '/eos/uscms/store/group/lpcsusyhiggs/ntuples/AZh/nAODv9/2018/DY4JetsToLLM-50'
file = join(base, 'all_DY4JetsToLLM-50_file001_part_1of3_Electrons.root')
events = NanoEventsFactory.from_root(file, schemaclass=NanoAODSchema).events()

ele = events.Electron
met = events.MET
tau = events.Tau
mu = events.Muon
jet = events.Jet

N=100
tt = ak.cartesian({'t1': ele[:N], 't2': tau[:N]}, axis=1)
ll = ak.combinations(ele[:N], 2, axis=1, fields=['l1', 'l2'])
eeet = ak.cartesian({'tt': tt, 'll': ll}, axis=1)
eeet = eeet[(ak.argmax(eeet.tt.t1.pt, axis=1, keepdims=True))]
tt = ak.cartesian({'t1': mu[:N], 't2': tau[:N]}, axis=1)
ll = ak.combinations(ele[:N], 2, axis=1, fields=['l1', 'l2'])
eemt = ak.cartesian({'tt': tt, 'll': ll}, axis=1)
eemt = eemt[(ak.argmax(eemt.tt.t1.pt, axis=1, keepdims=True))]
lltt_cats = {'eemt': eemt, 'eeet': eeet}

def apply_lepton_ID_SFs(lltt, cat, is_data=False):
    l1, l2 = lltt['ll']['l1'], lltt['ll']['l2']
    t1, t2 = lltt['tt']['t1'], lltt['tt']['t2']

    # e/mu scale factors
    if cat[:2] == 'ee':
        l1_w = lepton_ID_weight(l1, 'e', eIDs, is_data)
        l2_w = lepton_ID_weight(l2, 'e', eIDs, is_data)
    elif cat[:2] == 'mm':
        l1_w = lepton_ID_weight(l1, 'm', mIDs, is_data)
        l2_w = lepton_ID_weight(l2, 'm', mIDs, is_data)

    # also consider hadronic taus
    if cat[2:] == 'em':
        t1_w = lepton_ID_weight(t1, 'e', eIDs, is_data)
        t2_w = lepton_ID_weight(t2, 'm', mIDs, is_data)
    elif cat[2:] == 'et':
        t1_w = lepton_ID_weight(t1, 'e', eIDs, is_data)
        t2_w = tau_ID_weight(t2, tIDs, cat)
    elif cat[2:] == 'mt':
        t1_w = lepton_ID_weight(t1, 'm', mIDs, is_data)
        t2_w = tau_ID_weight(t2, tIDs, cat)
    elif cat[2:] == 'tt':
        t1_w = tau_ID_weight(t1, tIDs, cat)
        t2_w = tau_ID_weight(t2, tIDs, cat)

    # apply ID scale factors
    return l1_w * l2_w * t1_w * t2_w

weights = analysis_tools.Weights(len(events), storeIndividual=True)
for cat, lltt in lltt_cats.items(): 
    lltt = lltt[~ak.is_none(lltt, axis=1)]
    lltt = ak.fill_none(lltt, [], axis=0)
    flat, num = ak.flatten(lltt), ak.num(lltt)
    w = apply_lepton_ID_SFs(flat, cat)
    w = ak.unflatten(w, num)
    
weights

In [None]:
base = '/eos/uscms/store/group/lpcsusyhiggs/ntuples/AZh/nAODv9/2018/DY4JetsToLLM-50'
file = join(base, 'all_DY4JetsToLLM-50_file001_part_1of3_Electrons.root')
events = NanoEventsFactory.from_root(file, schemaclass=NanoAODSchema).events()
#base = '/eos/uscms/store/group/lpcsusyhiggs/ntuples/AZh/nAODv9/2018/SingleMuon_Run2018A'
#file = join(base, 'all_SingleMuon_Run2018A_file038_part_2of3_Muons.root')
#events = NanoEventsFactory.from_root(file, schemaclass=NanoAODSchema).events()
#tag0 = ak.Array(t1.layout.content.project(1)
    
#l1 = cands.ll.l1
#l1_tags = np.asarray(l1.layout.content.tags)
#for tag in np.unique(l1_tags):
##    tags = l1.layout.content.project(tag)
 #   ID = tags.content.parameter("__record__")

#for cat in ['eemt', 'eeet', 'mmet']:
#    l1 = cands.ll.l1[cands.category==cat]
#    print(ak.type(l1))
#    l1 = l1.layout.content.project(0)
#    print(l1)
    
#cands = ak.concatenate([lltt_eemt, lltt_eeet], axis=1)
#print('cands', ak.num(cands))
#tags = np.asarray(cands.layout.content.tags)
#print(tags)
#for tag in np.unique(tags):
#    mask = (tags==tag)
#    print(np.asarray(cands.layout.content.project(tag)))
#    
#ele = ak.Array(leptons.layout.content.project(0))
#print(ele.dEsigmaUp)

ele = events.Electron
met = events.MET
tau = events.Tau
mu = events.Muon
jet = events.Jet

N=10**6
tt = ak.cartesian({'t1': ele[:N], 't2': tau[:N]}, axis=1)
ll = ak.combinations(ele[:N], 2, axis=1, fields=['l1', 'l2'])
eeet = ak.cartesian({'tt': tt, 'll': ll}, axis=1)
eeet = eeet[(ak.argmax(eeet.tt.t1.pt, axis=1, keepdims=True))]
tt = ak.cartesian({'t1': mu[:N], 't2': tau[:N]}, axis=1)
ll = ak.combinations(ele[:N], 2, axis=1, fields=['l1', 'l2'])
eemt = ak.cartesian({'tt': tt, 'll': ll}, axis=1)
eemt = eemt[(ak.argmax(eemt.tt.t1.pt, axis=1, keepdims=True))]
tt = ak.cartesian({'t1': ele[:N], 't2': tau[:N]}, axis=1)
ll = ak.combinations(mu[:N], 2, axis=1, fields=['l1', 'l2'])
mmet = ak.cartesian({'tt': tt, 'll': ll}, axis=1)
mmet = mmet[(ak.argmax(mmet.tt.t1.pt, axis=1, keepdims=True))]
print('eeet', ak.sum(ak.num(eeet)))
print('eemt', ak.sum(ak.num(eemt)))
print('mmet', ak.sum(ak.num(mmet)))
eemt["category"] = 'eemt'
eeet["category"] = 'eeet'
mmet["category"] = 'mmet'
#cands = ak.concatenate([eemt, eeet, mmet], axis=1)
cands = eemt
cands['met'] = met[:N]
cands = cands[~ak.is_none(cands, axis=1)]
cands = cands[ak.num(cands)>0]    
print(cands)

In [None]:
from time import time

def apply_eleES(ele, met, syst='nom'):
    if (syst=='nom'):
        return ele, met
    t0 = time()
    pt, eta = ele.pt, ele.eta
    phi, mass = ele.phi, ele.mass
    in_barrel = (abs(eta) < 1.479)
    in_crossover = ((abs(eta) > 1.479) & (abs(eta) < 1.653))
    in_endcap = (abs(eta) > 1.653)
    barrel_shifts = {'up': 1.03, 'down': 0.97}
    crossover_shifts = {'up': 1.04, 'down': 0.96}
    endcap_shifts = {'up': 1.05, 'down': 0.95}
    weights = (in_barrel * barrel_shifts[syst] +
               in_crossover * crossover_shifts[syst] +
               in_endcap * endcap_shifts[syst])
    ele_p4 = ak.zip({'pt': ele.pt, 'eta': ele.eta,
                     'phi': ele.phi, 'mass': ele.mass},
                     with_name='PtEtaPhiMLorentzVector')
    ele_p4_shift = (weights * ele_p4)
    ele_x_diff = (1-weights) * ele.pt * np.cos(ele.phi)
    ele_y_diff = (1-weights) * ele.pt * np.sin(ele.phi)
    met_x = met.pt * np.cos(met.phi) + ele_x_diff
    met_y = met.pt * np.sin(met.phi) + ele_y_diff
    met_p4 = ak.zip({'x': met_x, 'y': met_y,
                     'z': 0, 't': 0}, with_name='LorentzVector')
    met['pt'] = met_p4.pt
    met['phi'] = met_p4.phi
    return ele_p4_shift, met

def apply_eleSmear(ele, met, syst='nom'):
    if (syst=='nom'):
        return ele, met
    shift = ele.dEsigmaUp if (syst=='up') else ele.dEsigmaDown
    weights = shift + 1.0
    ele_p4 = ak.zip({'pt': ele.pt, 'eta': ele.eta,
                     'phi': ele.phi, 'mass': ele.mass},
                    with_name='PtEtaPhiMLorentzVector')
    ele_p4_shift = (weights * ele_p4)
    ele_x_diff = (1-weights) * ele.pt * np.cos(ele.phi)
    ele_y_diff = (1-weights) * ele.pt * np.sin(ele.phi)
    met_x = met.pt * np.cos(met.phi) + ele_x_diff
    met_y = met.pt * np.sin(met.phi) + ele_y_diff
    met_p4 = ak.zip({'x': met_x, 'y': met_y,
                     'z': 0, 't': 0}, with_name='LorentzVector')
    met['pt'] = met_p4.pt
    met['phi'] = met_p4.phi
    return ele_p4_shift, met_p4

print(ele.pt)
ele_new, met_new = apply_eleSmear(cands.ll.l1, cands.met, 'up')
print(ele_new.pt)

In [None]:
[[55.8], [71.7], [44.4, 41.4], [68.5, ... 62.7, 20.5], [85.3, 28.4], [99.1, 22.1]]
1: 0.03943324089050293
2: 0.10126161575317383
3: 0.12982583045959473
4: 0.17160296440124512
[[42.2], [65.1], [53.5], [174], [43.9], ... 48.9], [105, 105], [147], [82.7], [96.1]]

# 

In [None]:
def apply_eleES(ele, met, eleES_shift='nom', eleSmear_shift='nom'):
    # decide ES weights by region of the detector
    in_barrel = (abs(ele.eta) < 1.479)
    in_crossover = ((abs(ele.eta) > 1.479) & (abs(ele.eta) < 1.653))
    in_endcap = (abs(ele.eta) > 1.653)
    barrel_shifts = {'up': 1.03, 'nom': 1.0, 'down': 0.97}
    crossover_shifts = {'up': 1.04, 'nom': 1.0, 'down': 0.96}
    endcap_shifts = {'up': 1.05, 'nom': 1.0, 'down': 0.95}
    eleES_weights = (in_barrel * barrel_shifts[eleES_shift] +
                     in_crossover * crossover_shifts[eleES_shift] +
                     in_endcap * endcap_shifts[eleES_shift])

    # get smearing weights
    if eleSmear_shift=='nom':
        shift = 0
    else:
        shift = ele.dEsigmaUp if (eleSmear_shift=='up') else ele.dEsigmaDown
    eleSmear_weights = shift + 1.0

    ele_p4 = ak.zip({'pt': ele.pt, 'eta': ele.eta,
                     'phi': ele.phi, 'mass': ele.mass},
                     with_name='PtEtaPhiMLorentzVector')

    # apply weights
    weights = eleES_weights * eleSmear_weights
    ele_p4_shift = (weights * ele_p4)
    ele_x_diff = (1-weights) * ele.pt * np.cos(ele.phi)
    ele_y_diff = (1-weights) * ele.pt * np.sin(ele.phi)
    diffs = {'x': ele_x_diff, 'y': ele_y_diffs}
    return ele_p4_shift, diffs




met_x = met.pt * np.cos(met.phi) + ele_x_diff
    met_y = met.pt * np.sin(met.phi) + ele_y_diff
    met_p4 = ak.zip({'x': met_x, 'y': met_y,
                     'z': 0, 't': 0}, with_name='LorentzVector')
    met['pt'] = met_p4.pt
    met['phi'] = met_p4.phi

In [None]:
a.keys()

In [None]:
base = '/eos/uscms/store/group/lpcsusyhiggs/ntuples/AZh/nAODv9/2018/BBAToZhToLLTauTauM325'
file = join(base, 'all_BBAToZhToLLTauTauM325_file006_part_1of3_Electrons.root')
events = NanoEventsFactory.from_root(file, schemaclass=NanoAODSchema).events()
taus = events.Tau
taus = ak.flatten(taus)

In [None]:
import awkward as ak
from azh_analysis.utils.corrections import get_tau_ID_weights, tau_ID_weight

tID_base = f"../corrections/tau_ID/UL_{year}"
tID_file = join(tID_base, "tau.corr.json")
tIDs = get_tau_ID_weights(tID_file)
tau_ID_weight(taus, tIDs, "eett", syst="nom")

In [None]:
base = "/uscms_data/d3/jdezoort/AZh_columnar/CMSSW_10_2_9/src/azh_coffea/src/corrections/temp"
for f in os.listdir(base):
    vs_e, vs_j = f.split("_")[-2], f.split("_")[-3]
    f = uproot.open(join(base, f))
    for k in f.keys():
        ks = k.split("_"-)
        dm, year = ks[0], ks[1]
        if "2016" in year: 
            year = year + ks[2]
        else:
            
        

In [None]:
f.keys()

In [None]:
sys.path.append('../')
from azh_analysis.utils.corrections import get_tau_ID_weights
tID_base = f"../corrections/tau_ID/UL_{year}"
tID_file = join(tID_base, "tau.corr.json")
tIDs = get_tau_ID_weights(tID_file)
print([k for k in tIDs.keys()])
print(tIDs["DeepTau2017v2p1VSe"])

In [None]:
from azh_analysis.utils.btag import get_btag_SFs, get_btag_tables
btag_root = "../corrections/btag/"
btag_tables = get_btag_tables(btag_root, f"{year}", UL=True)
btag_SFs = get_btag_SFs(btag_root, f"{year}", UL=True)
print([k for k in btag_tables[0].keys() if "ZZ" in k])