# NanoEventsFactory example

Load a single file from x-cache (using redirector_ucsd) or the grid (using redirector_fnal), and play with the events.


In [None]:
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea.analysis_tools import Weights, PackedSelection

from Tools.config_helpers import redirector_fnal, redirector_ucsd
from Tools.nano_mapping import make_fileset
from Tools.helpers import get_samples
from Tools.gen import get_charge_parent, find_first_parent

import awkward as ak
import numpy as np

#amples = get_samples(2018)

fileset = make_fileset(['DY'], 2018, redirector=redirector_ucsd, small=True)

#f_in = '/store/mc/RunIIAutumn18NanoAODv7/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/RunIIAutumn18NanoAODv7-Nano02Apr2020_102X_upgrade2018_realistic_v21-v1/60000/022107FA-F567-1B44-B139-A18ADC996FCF.root'

# load a subset of events
n_max = 500000
events = NanoEventsFactory.from_root(
    fileset[list(fileset.keys())[0]][0],
    schemaclass = NanoAODSchema,
    #entry_stop = n_max,
).events()

In [None]:
#fileset[list(fileset.keys())[0]][1]

In [None]:
#events.event

In [None]:
#ev = events[events.event==33539380]

In [None]:
#print(events[events.event==33539380].Electron.pt, events[events.event==33539380].Electron.eta, events[events.event==33539380].Electron.phi, events[events.event==33539380].Electron.charge,)

In [None]:
import time

from Tools.objects import Collections

start_time = time.time()
electron = Collections(events, 'Electron', 'tightFCNC', verbose=1).get()
muon = Collections(events, 'Muon', 'tight', verbose=1).get()

delta_time = time.time()-start_time

print ("\nTook %s seconds"%delta_time)

In [None]:
from Tools.objects import *
electron = electron[(electron.pt > 25) & (np.abs(electron.eta) < 2.4)]
electron = electron[(electron.genPartIdx >= 0)]
electron = electron[(np.abs(electron.matched_gen.pdgId)==11)]  #from here on all leptons are gen-matched
electron = electron[( (electron.genPartFlav==1) | (electron.genPartFlav==15) )] #and now they are all prompt

In [None]:
muon = muon[(muon.pt > 15) & (np.abs(muon.eta) < 2.4)]
muon = muon[(muon.genPartIdx >= 0)]
muon = muon[(np.abs(muon.matched_gen.pdgId)==13)] #from here, all muons are gen-matched
muon = muon[( (muon.genPartFlav==1) | (muon.genPartFlav==15) )] #and now they are all prompt

In [None]:
OSelectron = (ak.sum(electron.charge, axis=1) == 0) & (ak.num(electron)==2)
        
dielectron = choose(electron, 2)
dielectron_mass = (dielectron['0']+dielectron['1']).mass
dielectron_pt = (dielectron['0']+dielectron['1']).pt
        
leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
leading_electron = electron[leading_electron_idx]
leading_electron = leading_electron[(leading_electron.pt > 30)]

trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
trailing_electron = electron[trailing_electron_idx]

In [None]:
from Tools.basic_objects import *

jet       = getJets(events, minPt=40, maxEta=2.4, pt_var='pt')
jet       = jet[ak.argsort(jet.pt, ascending=False)] # need to sort wrt smeared and recorrected jet pt
jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons

In [None]:
mass = (ak.min(np.abs(dielectron_mass-91.2), axis = 1) < 15)
lead_electron = (ak.min(leading_electron.pt, axis = 1) > 30)
jet1 = (ak.num(jet) >= 1)
jet2 = (ak.num(jet) >= 2)
os = (OSelectron)


selection = PackedSelection()
selection.add('os',          os )
selection.add('mass',        mass)
selection.add('leading',     lead_electron)
selection.add('two jets',    jet2)
        
o_reqs =  ['os'] + ['leading'] + ['mass']
o_reqs_d = {sel: True for sel in o_reqs }
os_sel = selection.require(**o_reqs_d)
                
j2o_reqs = o_reqs + ['two jets']
j2o_reqs_d = {sel: True for sel in j2o_reqs }
j2os_sel = selection.require(**j2o_reqs_d)

In [None]:
leading_electron = leading_electron[j2os_sel]
trailing_electron = trailing_electron[j2os_sel]

In [None]:
from Tools.lepton_scalefactors import *
from Tools.lepton_scalefactors2 import *

leptonSF = LeptonSF(year=2018)
leptonSF2 = LeptonSF2(year=2018)

# setting up the various weights
weight2 = Weights( len(events) )
        
# generator weight
weight2.add("lepton", leptonSF2.get(electron, muon))

# setting up the various weights
weight3 = Weights( len(events) )

In [None]:
print(leptonSF2.get(electron, muon))
print(electron)
print(muon)

In [None]:
my_event = events[events.event==6547960]

In [None]:
from Tools.objects import Collections
electron = Collections(my_event, 'Electron', 'tightFCNC', verbose=0).get()
muon = Collections(my_event, 'Muon', 'tight', verbose=0).get()

In [None]:
electron.pt[:,1:2]

In [None]:
electron.eta[:,1:2]

In [None]:
#leptonSF.get(electron[:,0:1],muon)

In [None]:
from Tools.lepton_scalefactors2 import LeptonSF2

leptonSF2 = LeptonSF2(year=2018)
leptonSF2.get(electron, muon)

In [None]:
print(leptonSF2.get(electron, muon))

In [None]:
from yahist import Hist1D, Hist2D
muonScaleFactor_RunABCD_counts = np.array([[0.9824, 1.0271, 0.9948, 0.9960, 0.9990],
                                           [0.9784, 1.0110, 0.9906, 0.9949, 0.9970],
                                           [1.0153, 0.9855, 1.0042, 1.0010, 1.0010],
                                           [1.0511, 0.9913, 1.0103, 1.0041, 1.0030]])
muonScaleFactor_RunABCD_bins = [
        np.array([20.,25.,30.,40.,50.,60.]),
        np.array([0,0.9,1.2,2.1,2.4]),
    ]
h = Hist2D.from_bincounts(muonScaleFactor_RunABCD_counts, muonScaleFactor_RunABCD_bins)
from Tools.helpers import yahist_2D_lookup
muonScaleFactor_RunABCD = yahist_2D_lookup(h, muon.pt, np.abs(muon.eta))
print(ak.prod(muonScaleFactor_RunABCD, axis=1))


In [None]:
h.plot(show_counts=True)

You can do anything that you're usually doing inside the processor here as well, so this is very useful for debugging

In [None]:
matched_electrons = electrons[electrons.genPartIdx>=0]
sum(ak.num(matched_electrons, axis=1))

In [None]:
f_in = '/store/mc/RunIIAutumn18NanoAODv7/QCD_Pt-120to170_MuEnrichedPt5_TuneCP5_13TeV_pythia8/NANOAODSIM/Nano02Apr2020_102X_upgrade2018_realistic_v21-v1/70000/DE335891-829A-B943-99BE-E5A179F5F3EB.root'

events = NanoEventsFactory.from_root(
    redirector_ucsd + f_in,
    schemaclass = NanoAODSchema,
    entry_stop = 9999999).events()

muons = Collections(events, 'Muon', 'tight', verbose=1).get()

single_mu_ev = events[ak.num(muons)>0]

event_list_tight = single_mu_ev.event
event_list_tight

In [None]:
import numpy as np
event_selector = (np.zeros_like(events.MET.pt) == 1)

In [None]:
my_events = [29552, 12024433]

In [None]:
for ev in my_events:
    event_selector = (event_selector | (events.event == ev))

In [None]:
def get_charge_parent(particle):
    parent = find_first_parent(particle)
    charge = ak.zeros_like(parent)
    one = [[-11, -13, -15, -17, 24, 37], 1]
    minus_one = [[11, 13, 15, 17, -24, -37], -1]
    two_thirds = [[2, 4, 6, 8], 2/3]
    minus_two_thirds = [[-2, -4, -6, -8], -2/3]
    minus_one_third = [[1, 3, 5, 7], -1/3]
    one_third = [[-1, -3, -5, -7], 1/3]
    zero = [[12, 14, 16, 18, 9, 21, 22, 23, 25], 0]
    
    charge_pairs = [one, minus_one, two_thirds, minus_two_thirds, minus_one_third, zero]
    
    for pair in charge_pairs:
        for ID in pair[0]:
            charge = (parent == ID)*ak.ones_like(parent)*pair[1] + (~(parent == ID))*charge
            
    return charge

In [None]:
mu_df = ak.to_pandas(ak.flatten(muons[event_selector][fields_to_show]))
mu_df

In [None]:
ev_df = ak.to_pandas(events.MET[event_selector])
ev_df

In [None]:
import pandas as pd
pd.concat([mu_df, ev_df], axis=1, )

## Some charge flip work

In [None]:
!wget http://uaf-8.t2.ucsd.edu/~ewallace/chargeflipfull2016.pkl.gz

In [None]:
from Tools.helpers import yahist_2D_lookup
import gzip
import pickle
 
class charge_flip:
    def __init__(self, path):
        self.path = path
        with gzip.open(self.path) as fin:
            self.ratio= pickle.load(fin)
    
    def flip_ratio(self, lepton1, lepton2):
        """takes a dilepton event and weights it based on the 
        odds that one of the leptons has a charge flip"""

        flip1 = yahist_2D_lookup(self.ratio, lepton1.pt, abs(lepton1.eta))
        flip2 = yahist_2D_lookup(self.ratio, lepton2.pt, abs(lepton2.eta))

        flip_rate1 = (ak.prod(flip1, axis = 1) * ak.prod(1/(1-flip1), axis = 1) * ak.prod(1-flip2/(1-flip2), axis = 1)) + (ak.prod(flip2, axis = 1) * ak.prod(1/(1-flip2), axis = 1) * ak.prod(1-flip1/(1-flip1), axis = 1))

        return flip_rate1
    
    def flip_weight(self, electron):

        f_1 = yahist_2D_lookup(self.ratio, electron.pt[:,0:1], abs(electron.eta[:,0:1]))
        f_2 = yahist_2D_lookup(self.ratio, electron.pt[:,1:2], abs(electron.eta[:,1:2]))

        # I'm using ak.prod and ak.sum to replace empty arrays by 1 and 0, respectively
        weight = ak.sum(f_1/(1-f_1), axis=1)*ak.prod(1-f_2/(1-f_2), axis=1) + ak.sum(f_2/(1-f_2), axis=1)*ak.prod(1-f_1/(1-f_1), axis=1)

        return weight

In [None]:
cf = charge_flip('chargeflipfull2016.pkl.gz')

In [None]:
electrons.matched_gen.parent.pdgId

In [None]:
gen_matched_electrons = electrons[( (electrons.genPartIdx >= 0) & (abs(electrons.matched_gen.pdgId)==11) )]

In [None]:
gen_matched_electrons.eta.tolist()

In [None]:
is_flipped =( (gen_matched_electrons.matched_gen.pdgId*(-1) == gen_matched_electrons.pdgId) & (abs(gen_matched_electrons.pdgId) == 11) )

In [None]:
flipped_electrons = gen_matched_electrons[is_flipped]

In [None]:
flipped_electrons = flipped_electrons[(ak.fill_none(flipped_electrons.pt, 0)>0)]

In [None]:
flipped_electrons.pt.tolist()

# Sample validation

In [None]:
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea.analysis_tools import Weights, PackedSelection

from Tools.config_helpers import redirector_fnal, redirector_ucsd

import awkward as ak

f_in = '/store/mc/RunIISummer20UL17NanoAODv2/WZTo3LNu_mllmin01_NNPDF31_TuneCP5_13TeV_powheg_pythia8/NANOAODSIM/106X_mc2017_realistic_v8-v1/50000/D613B3F6-C909-9A46-BC99-59CC6FF21223.root'

# load a subset of events
n_max = 500000
events_UL = NanoEventsFactory.from_root(
    redirector_ucsd+f_in,
    schemaclass = NanoAODSchema,
    entry_stop = n_max).events()



In [None]:
from Tools.objects import choose
from yahist import Hist1D

dimuon_UL = choose(events_UL.Muon, 2)

In [None]:
bins = [ x for x in range(0,200,2)]
Hist1D(ak.flatten(dimuon_UL[ak.num(dimuon_UL)>0].mass), bins=bins)

In [None]:
bins = [ x/10. for x in range(0,200,1)]
h_UL = Hist1D(ak.flatten(dimuon_UL[ak.num(dimuon_UL)>0].mass), bins=bins, overflow=False)

In [None]:
f_in = '/store/mc/RunIISummer16NanoAODv7/WZTo3LNu_mllmin01_13TeV-powheg-pythia8/NANOAODSIM/PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/70000/5023ABC1-B4D7-D749-BD69-7EA689C7D3B6.root'

# load a subset of events
n_max = 500000
events = NanoEventsFactory.from_root(
    redirector_ucsd+f_in,
    schemaclass = NanoAODSchema,
    entry_stop = n_max).events()

dimuon = choose(events.Muon, 2)
bins = [ x/10. for x in range(0,200,1)]

In [None]:
import matplotlib.pyplot as plt
import mplhep as hep
plt.style.use(hep.style.CMS)

h = Hist1D(ak.flatten(dimuon[ak.num(dimuon)>0].mass), bins=bins, overflow=False)

f, ax = plt.subplots()

hep.histplot(
    #[(h/sum(h.counts)).counts, (h_UL/sum(h_UL.counts)).counts],
    [h.counts, h_UL.counts],
    h.edges,
    #w2=[my_histos['tch'].errors/100, my_histos['tuh'].errors/100],
    histtype="step",
    stack=False,
    label=[r'Summer16', r'Summer20'],
    color=['blue', 'red'],
    #density=[True, True],
    ax=ax)

h.plot()

ax.set_ylabel(r'a.u.')
ax.set_xlabel(r'$M(\ell\ell)\ (GeV)$')

plt.legend()

f.savefig('/home/users/dspitzba/public_html/WZ_minmll.png')

In [None]:
sum(h.counts)

In [None]:
events_UL.Jet.jetId

In [None]:
events.Flag.ecalBadCalibFilter

In [None]:
events_UL.HLT.Ele32_WPTight_Gsf