# NanoEventsFactory example

Load a single file from x-cache (using redirector_ucsd) or the grid (using redirector_fnal), and play with the events.


In [None]:
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea.analysis_tools import Weights, PackedSelection

from Tools.config_helpers import redirector_fnal, redirector_ucsd
from Tools.nano_mapping import make_fileset
from Tools.helpers import get_samples
from Tools.gen import get_charge_parent, find_first_parent
from Tools.basic_objects import getJets, getBTagsDeepFlavB

import awkward as ak
import numpy as np

#samples = get_samples(2018)

year = 2018

fileset = make_fileset(['hut'], year, redirector=redirector_ucsd, small=True)

#f_in = '/store/mc/RunIIAutumn18NanoAODv7/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/RunIIAutumn18NanoAODv7-Nano02Apr2020_102X_upgrade2018_realistic_v21-v1/60000/022107FA-F567-1B44-B139-A18ADC996FCF.root'

# load a subset of events
#n_max = 500000
events = NanoEventsFactory.from_root(
    fileset[list(fileset.keys())[0]][0],
    schemaclass = NanoAODSchema,
    #entry_stop = n_max,
).events()

In [None]:
fileset[list(fileset.keys())[0]]

In [None]:
#events.event

In [None]:
#ev = events[events.event==33539380]

In [None]:
#print(events[events.event==33539380].Electron.pt, events[events.event==33539380].Electron.eta, events[events.event==33539380].Electron.phi, events[events.event==33539380].Electron.charge,)

In [None]:
import time

from Tools.objects import Collections

start_time = time.time()
#electron = Collections(events, 'Electron', 'tightFCNC', year=2018, verbose=1, ).get()
#muon = Collections(events, 'Muon', 'tightFCNC', year=2018, verbose=1, ).get()

loose_electron = Collections(events, 'Electron', 'looseFCNC', year=2018, verbose=1, ).get()
loose_muon = Collections(events, 'Muon', 'looseFCNC', year=2018, verbose=1, ).get()

delta_time = time.time()-start_time

print ("\nTook %s seconds"%delta_time)

In [None]:
gen = events.Generator
list(gen[np.abs(gen.id1) != 21].x1)

In [None]:
list(gen.id1[(np.abs(gen.id1) != 21)])

In [None]:
list(gen.id2[np.abs(gen.id2) != 21])

In [None]:
X = ak.concatenate([gen[np.abs(gen.id2) != 21].x2, gen[np.abs(gen.id1) != 21].x1])
list(X)

In [None]:
gen_particles = events.GenPart
gen_Higgs = gen_particles[(gen_particles.pdgId == 25)][:,-1]

In [None]:
list(np.sort(gen_Higgs.pvec.absolute())[::-1])

In [None]:
gen_t = gen_particles[(np.abs(gen_particles.pdgId) == 6) & (gen_particles.status == 62)] #

In [None]:
gen_lep = events.GenDressedLepton
gen_lep2 = gen_lep[np.abs(gen_lep.eta) > 2.4999999]

In [None]:
np.sum(np.abs(gen_lep2.eta))

In [None]:
from Tools.objects import Collections, choose

## Electrons
electron = Collections(events, "Electron", "tightFCNC", year, 0).get()
electron = electron[((electron.pt > 20) | (electron.conePt > 20)) & (np.abs(electron.eta) < 2.4)] #matches skim 
        
loose_electron = Collections(events, "Electron", "looseFCNC", year, 0).get()
loose_electron = loose_electron[((loose_electron.pt > 20) | (loose_electron.conePt > 20) ) & (np.abs(loose_electron.eta) < 2.4)] #matches skim 
        
SSelectron = (ak.sum(electron.charge, axis=1) != 0) & (ak.num(electron)==2)
OSelectron = (ak.sum(electron.charge, axis=1) == 0) & (ak.num(electron)==2)
        
dielectron = choose(electron, 2)
dielectron_mass = dielectron.mass
dielectron_pt = dielectron.pt
        
electron       = electron[ak.argsort(electron.pt, ascending=False)]
leading_electron = electron[:,0:1]
trailing_electron = electron[:,1:2]
        
loose_electron       = loose_electron[ak.argsort(loose_electron.pt, ascending=False)]
leading_loose_electron = loose_electron[:,0:1]
trailing_loose_electron = loose_electron[:,1:2]
        
diloose_electron = choose(loose_electron, 2)
diloose_electron_OS = diloose_electron[(diloose_electron.charge == 0)]
        
##Muons
muon = Collections(events, "Muon", "tightFCNC", year, 0).get()
muon = muon[((muon.pt > 20) | (muon.conePt > 20)) & (np.abs(muon.eta) < 2.4)] #matches skim
        
loose_muon = Collections(events, "Muon", "looseFCNC", year, 0).get()
loose_muon = loose_muon[((loose_muon.pt > 20) | (loose_muon.conePt > 20)) & (np.abs(loose_muon.eta) < 2.4)] #matches skim 
        
muon       = muon[ak.argsort(muon.pt, ascending=False)]
leading_muon = muon[:,0:1]
trailing_muon = muon[:,1:2]
        
loose_muon       = loose_muon[ak.argsort(loose_muon.pt, ascending=False)]
leading_loose_muon = loose_muon[:,0:1]
trailing_loose_muon = loose_muon[:,1:2]
        
diloose_muon = choose(loose_muon, 2)
diloose_muon_OS = diloose_muon[(diloose_muon.charge == 0)]

        
##Leptons
lepton   = ak.concatenate([muon, electron], axis=1) #tight leptons, matches skim
        
SSlepton = ( (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton)==2) )
OSlepton = ( (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton)==2) )
        
lepton = lepton[ak.argsort(lepton.pt, ascending = False)]
leading_lepton = lepton[:,0:1]
subleading_lepton = lepton[:,1:2]
subsubleading_lepton = lepton[:,2:3]
        
dilepton = choose(lepton, 2)
dilepton_mass = dilepton.mass
dilepton_pt = dilepton.pt
        
loose_lepton = ak.concatenate([loose_muon, loose_electron], axis=1) #matches skim

In [None]:
skim = (((ak.num(loose_lepton) == 2) & (ak.sum(loose_lepton.charge, axis=1) != 0)) | (ak.num(loose_lepton) > 2))


In [None]:
list(skim[(ak.num(loose_lepton) > 3)])

In [None]:
list(ak.num(loose_lepton[(ak.num(loose_lepton) > 3)]))

In [None]:
list(ak.sum(loose_lepton[(ak.num(loose_lepton) >= 2)].charge, axis=1))

In [None]:
list( ( (ak.num(loose_lepton3) == 3) & ( (ak.all(np.abs(diloose_electron3_OS.mass-90) > 15, axis=1)) & (ak.all(np.abs(diloose_muon2_OS.mass-90) > 15, axis=1)) )  )  | (ak.num(loose_lepton3) != 3) )

In [None]:
list(diloose_muon2[(~( ( (ak.num(loose_lepton3) == 3) & ( (ak.all(np.abs(diloose_electron3_OS.mass-90) > 15, axis=1)) & (ak.all(np.abs(diloose_muon2_OS.mass-90) > 15, axis=1)) )  )  | (ak.num(loose_lepton3) != 3) ))].charge)

In [None]:
list(diloose_muon2[(~( ( (ak.num(loose_lepton3) == 3) & ( (ak.all(np.abs(diloose_electron3_OS.mass-90) > 15, axis=1)) & (ak.all(np.abs(diloose_muon2_OS.mass-90) > 15, axis=1)) )  )  | (ak.num(loose_lepton3) != 3) ))].mass)

In [None]:
list(ak.num(loose_lepton3[(~( ( (ak.num(loose_lepton3) == 3) & ( (ak.all(np.abs(diloose_electron3_OS.mass-90) > 15, axis=1)) & (ak.all(np.abs(diloose_muon2_OS.mass-90) > 15, axis=1)) )  )  | (ak.num(loose_lepton3) != 3) ))]))

In [None]:
list( ( (ak.num(loose_lepton3) == 3) & ( (ak.all(np.abs(diloose_electron3_OS.mass) > 12, axis=1)) & (ak.all(np.abs(diloose_muon2_OS.mass) > 12, axis=1)) )  )  | (ak.num(loose_lepton3) != 3) )

In [None]:
list(diloose_electron3[(~( ( (ak.num(loose_lepton3) == 3) & (  (ak.all(np.abs(diloose_electron3_OS.mass) > 12, axis=1)) & (ak.all(np.abs(diloose_muon2_OS.mass) > 12, axis=1)) )  )  | (ak.num(loose_lepton3) != 3) ))].charge)

In [None]:
list(diloose_electron3[(~( ( (ak.num(loose_lepton3) == 3) & ( (ak.all(np.abs(diloose_electron3_OS.mass) > 12, axis=1)) & (ak.all(np.abs(diloose_muon2_OS.mass) > 12, axis=1)) )  )  | (ak.num(loose_lepton3) != 3) ))].mass)

In [None]:
list(ak.num(loose_lepton3[(~( ( (ak.num(loose_lepton3) == 3) & ( (ak.all(np.abs(diloose_electron3_OS.mass) > 12, axis=1)) & (ak.all(np.abs(diloose_muon2_OS.mass) > 12, axis=1)) )  )  | (ak.num(loose_lepton3) != 3) ))]))

In [None]:
test_array = ak.Array([[True, False], [False], [True], [True, True], [False, False, False]])
print( "all:", ak.all(test_array, axis=1))
print( "any:", ak.any(test_array, axis=1))

In [None]:
empty_array = [[], [], []]
print("all:", ak.all(empty_array, axis=1))
print("any:", ak.any(empty_array, axis=1))

In [None]:
ak.any([]) | ak.any([])

In [None]:
ak.to_numpy(ak.flatten(leading_muon.pt))

In [None]:
from Tools.basic_objects import *

jet       = getJets(events, minPt=40, maxEta=2.4, pt_var='pt', UL = False)
jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons


jet2       = getJets(events, minPt=0, maxEta=2.4, pt_var='pt', UL = False)
jet2       = jet2[ak.argsort(jet2.pt, ascending=False)]
jet2       = jet2[~match(jet2, muon, deltaRCut=0.4)] # remove jets that overlap with muons
jet2       = jet2[~match(jet2, electron, deltaRCut=0.4)] # remove jets that overlap with electrons

btag      = getBTagsDeepFlavB(jet2, year=2018, UL=False)
btag      = btag[ak.argsort(btag.pt, ascending=False)]
print(btag.pt)        
leading_btag = btag[:, 0:1]
print(leading_btag.pt)
subleading_btag = btag[:, 1:2]
print(subleading_btag.pt)

In [None]:
events.HLT.Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL_DZ | events.HLT.Mu8_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL_DZ

In [None]:
ak.to_numpy(events.Pileup.nTrueInt)

In [None]:
from Tools.pileup import pileup
PU = pileup(year=2018, UL = False)
PU.reweight(events.Pileup.nTrueInt, to='down')

In [None]:
from Tools.lepton_scalefactors import *
from Tools.lepton_scalefactors2 import *

leptonSF2 = LeptonSF2(year=2017)

In [None]:
print(leptonSF2.get(electron, muon))
print(electron)
print(muon)

In [None]:
my_event = events[events.event==24040097]

In [None]:
from yahist import Hist1D, Hist2D
muonScaleFactor_RunABCD_counts = np.array([[0.9824, 1.0271, 0.9948, 0.9960, 0.9990],
                                           [0.9784, 1.0110, 0.9906, 0.9949, 0.9970],
                                           [1.0153, 0.9855, 1.0042, 1.0010, 1.0010],
                                           [1.0511, 0.9913, 1.0103, 1.0041, 1.0030]])
muonScaleFactor_RunABCD_bins = [
        np.array([20.,25.,30.,40.,50.,60.]),
        np.array([0,0.9,1.2,2.1,2.4]),
    ]
h = Hist2D.from_bincounts(muonScaleFactor_RunABCD_counts, muonScaleFactor_RunABCD_bins)
from Tools.helpers import yahist_2D_lookup
muonScaleFactor_RunABCD = yahist_2D_lookup(h, muon.pt, np.abs(muon.eta))
print(ak.prod(muonScaleFactor_RunABCD, axis=1))


In [None]:
h.plot(show_counts=True)

You can do anything that you're usually doing inside the processor here as well, so this is very useful for debugging

In [None]:
matched_electrons = electrons[electrons.genPartIdx>=0]
sum(ak.num(matched_electrons, axis=1))

In [None]:
f_in = '/store/mc/RunIIAutumn18NanoAODv7/QCD_Pt-120to170_MuEnrichedPt5_TuneCP5_13TeV_pythia8/NANOAODSIM/Nano02Apr2020_102X_upgrade2018_realistic_v21-v1/70000/DE335891-829A-B943-99BE-E5A179F5F3EB.root'

events = NanoEventsFactory.from_root(
    redirector_ucsd + f_in,
    schemaclass = NanoAODSchema,
    entry_stop = 9999999).events()

muons = Collections(events, 'Muon', 'tight', verbose=1).get()

single_mu_ev = events[ak.num(muons)>0]

event_list_tight = single_mu_ev.event
event_list_tight

In [None]:
import numpy as np
event_selector = (np.zeros_like(events.MET.pt) == 1)

In [None]:
my_events = [29552, 12024433]

In [None]:
for ev in my_events:
    event_selector = (event_selector | (events.event == ev))

In [None]:
def get_charge_parent(particle):
    parent = find_first_parent(particle)
    charge = ak.zeros_like(parent)
    one = [[-11, -13, -15, -17, 24, 37], 1]
    minus_one = [[11, 13, 15, 17, -24, -37], -1]
    two_thirds = [[2, 4, 6, 8], 2/3]
    minus_two_thirds = [[-2, -4, -6, -8], -2/3]
    minus_one_third = [[1, 3, 5, 7], -1/3]
    one_third = [[-1, -3, -5, -7], 1/3]
    zero = [[12, 14, 16, 18, 9, 21, 22, 23, 25], 0]
    
    charge_pairs = [one, minus_one, two_thirds, minus_two_thirds, minus_one_third, zero]
    
    for pair in charge_pairs:
        for ID in pair[0]:
            charge = (parent == ID)*ak.ones_like(parent)*pair[1] + (~(parent == ID))*charge
            
    return charge

In [None]:
mu_df = ak.to_pandas(ak.flatten(muons[event_selector][fields_to_show]))
mu_df

In [None]:
ev_df = ak.to_pandas(events.MET[event_selector])
ev_df

In [None]:
import pandas as pd
pd.concat([mu_df, ev_df], axis=1, )

## Some charge flip work

In [None]:
!wget http://uaf-8.t2.ucsd.edu/~ewallace/chargeflipfull2016.pkl.gz

In [None]:
from Tools.helpers import yahist_2D_lookup
import gzip
import pickle
 
class charge_flip:
    def __init__(self, path):
        self.path = path
        with gzip.open(self.path) as fin:
            self.ratio= pickle.load(fin)
    
    def flip_ratio(self, lepton1, lepton2):
        """takes a dilepton event and weights it based on the 
        odds that one of the leptons has a charge flip"""

        flip1 = yahist_2D_lookup(self.ratio, lepton1.pt, abs(lepton1.eta))
        flip2 = yahist_2D_lookup(self.ratio, lepton2.pt, abs(lepton2.eta))

        flip_rate1 = (ak.prod(flip1, axis = 1) * ak.prod(1/(1-flip1), axis = 1) * ak.prod(1-flip2/(1-flip2), axis = 1)) + (ak.prod(flip2, axis = 1) * ak.prod(1/(1-flip2), axis = 1) * ak.prod(1-flip1/(1-flip1), axis = 1))

        return flip_rate1
    
    def flip_weight(self, electron):

        f_1 = yahist_2D_lookup(self.ratio, electron.pt[:,0:1], abs(electron.eta[:,0:1]))
        f_2 = yahist_2D_lookup(self.ratio, electron.pt[:,1:2], abs(electron.eta[:,1:2]))

        # I'm using ak.prod and ak.sum to replace empty arrays by 1 and 0, respectively
        weight = ak.sum(f_1/(1-f_1), axis=1)*ak.prod(1-f_2/(1-f_2), axis=1) + ak.sum(f_2/(1-f_2), axis=1)*ak.prod(1-f_1/(1-f_1), axis=1)

        return weight

In [None]:
cf = charge_flip('chargeflipfull2016.pkl.gz')

In [None]:
electrons.matched_gen.parent.pdgId

In [None]:
gen_matched_electrons = electrons[( (electrons.genPartIdx >= 0) & (abs(electrons.matched_gen.pdgId)==11) )]

In [None]:
gen_matched_electrons.eta.tolist()

In [None]:
is_flipped =( (gen_matched_electrons.matched_gen.pdgId*(-1) == gen_matched_electrons.pdgId) & (abs(gen_matched_electrons.pdgId) == 11) )

In [None]:
flipped_electrons = gen_matched_electrons[is_flipped]

In [None]:
flipped_electrons = flipped_electrons[(ak.fill_none(flipped_electrons.pt, 0)>0)]

In [None]:
flipped_electrons.pt.tolist()

# Sample validation

In [None]:
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea.analysis_tools import Weights, PackedSelection

from Tools.config_helpers import redirector_fnal, redirector_ucsd

import awkward as ak

f_in = '/store/mc/RunIISummer20UL17NanoAODv2/WZTo3LNu_mllmin01_NNPDF31_TuneCP5_13TeV_powheg_pythia8/NANOAODSIM/106X_mc2017_realistic_v8-v1/50000/D613B3F6-C909-9A46-BC99-59CC6FF21223.root'

# load a subset of events
n_max = 500000
events_UL = NanoEventsFactory.from_root(
    redirector_ucsd+f_in,
    schemaclass = NanoAODSchema,
    entry_stop = n_max).events()



In [None]:
from Tools.objects import choose
from yahist import Hist1D

dimuon_UL = choose(events_UL.Muon, 2)

In [None]:
bins = [ x for x in range(0,200,2)]
Hist1D(ak.flatten(dimuon_UL[ak.num(dimuon_UL)>0].mass), bins=bins)

In [None]:
bins = [ x/10. for x in range(0,200,1)]
h_UL = Hist1D(ak.flatten(dimuon_UL[ak.num(dimuon_UL)>0].mass), bins=bins, overflow=False)

In [None]:
f_in = '/store/mc/RunIISummer16NanoAODv7/WZTo3LNu_mllmin01_13TeV-powheg-pythia8/NANOAODSIM/PUMoriond17_Nano02Apr2020_102X_mcRun2_asymptotic_v8-v1/70000/5023ABC1-B4D7-D749-BD69-7EA689C7D3B6.root'

# load a subset of events
n_max = 500000
events = NanoEventsFactory.from_root(
    redirector_ucsd+f_in,
    schemaclass = NanoAODSchema,
    entry_stop = n_max).events()

dimuon = choose(events.Muon, 2)
bins = [ x/10. for x in range(0,200,1)]

In [None]:
import matplotlib.pyplot as plt
import mplhep as hep
plt.style.use(hep.style.CMS)

h = Hist1D(ak.flatten(dimuon[ak.num(dimuon)>0].mass), bins=bins, overflow=False)

f, ax = plt.subplots()

hep.histplot(
    #[(h/sum(h.counts)).counts, (h_UL/sum(h_UL.counts)).counts],
    [h.counts, h_UL.counts],
    h.edges,
    #w2=[my_histos['tch'].errors/100, my_histos['tuh'].errors/100],
    histtype="step",
    stack=False,
    label=[r'Summer16', r'Summer20'],
    color=['blue', 'red'],
    #density=[True, True],
    ax=ax)

h.plot()

ax.set_ylabel(r'a.u.')
ax.set_xlabel(r'$M(\ell\ell)\ (GeV)$')

plt.legend()

f.savefig('/home/users/dspitzba/public_html/WZ_minmll.png')

In [None]:
sum(h.counts)

In [None]:
events_UL.Jet.jetId

In [None]:
events.Flag.ecalBadCalibFilter

In [None]:
events_UL.HLT.Ele32_WPTight_Gsf