# Example for analysis of NanoAOD samples

In this example we don't need any pre-processing of NanoAOD samples and can still use several tools of the tW_scattering repository.

- Get the proper normalization for samples
- Categorize different samples into process categories
- Use coffea processors for the map-reduce step
- Make "nice" histograms


In [None]:
%load_ext autoreload
%autoreload 2

import os

import warnings
warnings.filterwarnings('ignore')

In [None]:
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea import processor, hist
import time

from processor.charge_flip_ss import charge_flip_ss
from Tools.config_helpers import loadConfig
from klepto.archives import dir_archive

In [None]:
from processor.default_accumulators import desired_output, add_processes_to_output

from Tools.helpers import get_samples
from Tools.config_helpers import redirector_ucsd, redirector_ucsd_mini, redirector_fnal
from Tools.nano_mapping import make_fileset, nano_mapping

from processor.meta_processor import get_sample_meta

overwrite = True
local = False

# load the config and the cache
cfg = loadConfig()

cacheName = 'charge_flip_ss'+str(year)
cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), cacheName), serialized=True)

year = 2017

# get a python dictionary of all NanoAOD samples
# The samples definitions can be found in data/samples.yaml
samples = get_samples(year)

# make a fileset, taking the definitions in Tools/nano_mapping.py
fileset = make_fileset(['DY', 'top',], year, redirector=redirector_ucsd, small=False)
# in order for cutflows to work we need to add every process to the output accumulator
add_processes_to_output(fileset, desired_output)

histograms = sorted(list(desired_output.keys()))

meta = get_sample_meta(fileset, samples)

chunksize = 250000

if local:

    exe_args = {
        'workers': 16,
        'function_args': {'flatten': False},
        "schema": NanoAODSchema,
        "skipbadfiles": True,
    }
    exe = processor.futures_executor

else:
    from Tools.helpers import get_scheduler_address
    from dask.distributed import Client, progress
    
    scheduler_address = get_scheduler_address()
    c = Client(scheduler_address)
    
    def unique(filename):
        file, ext = os.path.splitext(filename)
        counter = 0
        while os.path.exists(filename):
            counter += 1
            filename = file + str(counter) + ext
        return filename

    tstart = time.time()
    
    from dask.distributed import performance_report
    fname = unique("dask/dask-report_chunksize=" + str(chunksize/1000) + "K.html")
    
    exe_args = {
        'client': c,
        'function_args': {'flatten': False},
        "schema": NanoAODSchema,
        "skipbadfiles": True,
        'savemetrics': True
    }
    exe = processor.dask_executor


if not overwrite:
    cache.load()

if cfg == cache.get('cfg') and histograms == cache.get('histograms') and cache.get('simple_output'):
    output = cache.get('simple_output')

else:
    print ("I'm running now")
    with performance_report(filename=fname):
        output = processor.run_uproot_job(
            fileset,
            "Events",
            charge_flip_ss(year=year, variations=[], accumulator=desired_output),
            exe,
            exe_args,
            chunksize=chunksize,
            )

    cache['fileset']        = fileset
    cache['cfg']            = cfg
    cache['histograms']     = histograms
    cache['simple_output']  = output
    cache.dump()
    

In [None]:
import cloudpickle
import gzip
dt = time.time() - tstart
outname = 'charge_calc_closure_'+str(year)
os.system("mkdir -p histos/")
print('Saving output in %s...'%("histos/" + outname + str(year) + ".pkl.gz"))
with gzip.open("histos/" + outname + ".pkl.gz", "wb") as fout:
    cloudpickle.dump(output, fout)
print('Done!')
print("Events / s / thread: {:,.0f}".format(output[1]['entries'].value / output[1]['processtime'].value))
print("Events / s: {:,.0f}".format(output[1]['entries'].value / dt))

In [None]:
print(output[1].keys(), output[1]['processtime'].value)

In [None]:
print(output[0]['totalEvents']['all']/1e6)

Full fileset is 174M events, and that's DY and ttbar.

In [None]:
# import the plotting libararies: matplotlib and mplhep

import matplotlib.pyplot as plt
import mplhep as hep
plt.style.use(hep.style.CMS)

import numpy as np


# load the functions to make a nice plot from the output histograms
# and the scale_and_merge function that scales the individual histograms
# to match the physical cross section

from plots.helpers import makePlot, scale_and_merge

# define a few axes that we can use to rebin our output histograms

N_bins         = hist.Bin('multiplicity', r'$N$', 10, -0.5, 9.5)
N_bins_red     = hist.Bin('multiplicity', r'$N$', 5, -0.5, 4.5)
pt_bins        = hist.Bin('pt', r'$p_{T}\ (GeV)$', np.array([15, 40, 60, 80, 100, 200, 300]))
pt_fine_bins   = hist.Bin('pt', r'$p_{T}\ (GeV)$', 12 ,0, 300)
eta_bins       = hist.Bin('eta', r'$\eta $', np.array([0, 0.8, 1.479, 2.5]))
phi_bins       = hist.Bin('phi', r'$\phi $', 16, -3.2, 3.2)
pdg_bins       = hist.Bin('pdgID', r'$pdg ID$', 26, 0, 25)


lumi = {2016: 36, 2017: 42, 2018: 60}
nano_mappings = nano_mapping(year)

# 1D Histograms

In [None]:
from yahist import Hist1D, Hist2D

In [None]:
tmp1 = output[0]['N_ele'].copy()
tmp1 = tmp1.rebin('multiplicity', N_bins_red)

tmp2 = output[0]['N_ele2'].copy()
tmp2 = tmp2.rebin('multiplicity', N_bins_red)

tmp3 = output[0]['electron_flips'].copy()
tmp3 = tmp3.rebin('multiplicity', N_bins_red)

tmp4 = output[0]['electron_flips2'].copy()
tmp4 = tmp4.rebin('multiplicity', N_bins_red)

tmp5 = output[0]['electron_flips3'].copy()
tmp5 = tmp5.rebin('multiplicity', N_bins_red)

tmp6 = output[0]['electron_flips4'].copy()
tmp6 = tmp6.rebin('multiplicity', N_bins_red)



h1 = Hist1D.from_bincounts(
    tmp1.sum('dataset').values()[()].T,
    (tmp1.axis('multiplicity').edges()),
)

h2 = Hist1D.from_bincounts(
    tmp2.sum('dataset').values()[()].T,
    (tmp2.axis('multiplicity').edges()),
)

h3 = Hist1D.from_bincounts(
    tmp3.sum('dataset').values()[()].T,
    (tmp3.axis('multiplicity').edges()),
)

h4 = Hist1D.from_bincounts(
    tmp4.sum('dataset').values()[()].T,
    (tmp4.axis('multiplicity').edges()),
)

h5 = Hist1D.from_bincounts(
    tmp5.sum('dataset').values()[()].T,
    (tmp5.axis('multiplicity').edges()),
)

h6 = Hist1D.from_bincounts(
    tmp6.sum('dataset').values()[()].T,
    (tmp6.axis('multiplicity').edges()),
)


fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h2.plot(ax=ax1, alpha=1, color="C0")
h1.plot(ax=ax1, alpha=1, color="C3")


ax1.set_xlabel(r'$N_{lepton}\ $')
ax1.set_ylabel(r'Events')

fig.legend(["weighted OS", "SS"])
h1.divide(h2, binomial=True).plot(ax=ax2, errors=True, label="ratio")



fig, (ax1) = plt.subplots(1, figsize=(10,10))
h4.plot(ax=ax1, alpha=1, color="C2")
h3.plot(ax=ax1, alpha=1, color="C1")


ax1.set_xlabel(r'$N_{flips}\ $')
ax1.set_ylabel(r'Events')

fig.legend(["weighted OS (ee)", "SS (ee)",])


fig, (ax1) = plt.subplots(1, figsize=(10,10))
h6.plot(ax=ax1, alpha=1, color="C2")
h5.plot(ax=ax1, alpha=1, color="C1")


ax1.set_xlabel(r'$N_{flips}\ $')
ax1.set_ylabel(r'Events')

fig.legend(["weighted OS (emu)", "SS (emu)",])

In [None]:
print(tmp3.sum('dataset').values()[()].T)
print(tmp4.sum('dataset').values()[()].T)

In [None]:
tmp1 = scale_and_merge(output[0]['electron'], samples, mc_fileset, nano_mappings, lumi=lumi[year])
tmp1 = tmp1.rebin('pt', pt_fine_bins)
tmp1 = tmp1.rebin('eta', eta_bins)

tmp2 = scale_and_merge(output[0]['electron2'], samples, mc_fileset, nano_mappings, lumi=lumi[year])
tmp2 = tmp2.rebin('pt', pt_fine_bins)
tmp2 = tmp2.rebin('eta', eta_bins)

h1_TT = Hist2D.from_bincounts(
    tmp1.values()[('top',)].T,
    (tmp1.axis('pt').edges(), tmp1.axis('eta').edges()),
    errors = np.sqrt(tmp1.values(sumw2=True)[('top',)][1].T),
)

h2_TT = Hist2D.from_bincounts(
    tmp2.values()[('top',)].T,
    (tmp2.axis('pt').edges(), tmp2.axis('eta').edges()),
    errors = np.sqrt(tmp1.values(sumw2=True)[('top',)][1].T),
)


h1_DY = Hist2D.from_bincounts(
    tmp1.values()[('DY',)].T,
    (tmp1.axis('pt').edges(), tmp1.axis('eta').edges()),
    errors = np.sqrt(tmp1.values(sumw2=True)[('DY',)][1].T),
)

h2_DY = Hist2D.from_bincounts(
    tmp2.values()[('DY',)].T,
    (tmp2.axis('pt').edges(), tmp2.axis('eta').edges()),
    errors = np.sqrt(tmp2.values(sumw2=True)[('DY',)][1].T),
)

In [None]:
hists = {
    'pt_ttbar_SS': h1_TT.projection('x'),
    'pt_ttbar_OS': h2_TT.projection('x'),
    'pt_DY_SS': h1_DY.projection('x'),
    'pt_DY_OS': h2_DY.projection('x'),
}

hists['pt_ttbar_OS'].label = '$OS\ (ttbar)$'
hists['pt_ttbar_OS'].color = '#8AC926'

hists['pt_DY_OS'].label = '$OS\ (DY)$'
hists['pt_DY_OS'].color = '#FFCA3A'

keys = ['pt_ttbar_OS', 'pt_DY_OS']

#total_mc = get_total(my_histos, keys)

ratio = (hists['pt_ttbar_SS']+hists['pt_DY_SS']).divide(hists['pt_ttbar_OS']+hists['pt_DY_OS'])

In [None]:
fig, (ax, rax) = plt.subplots(2,1,figsize=(10,10), gridspec_kw={"height_ratios": (3, 1), "hspace": 0.05}, sharex=True)
hep.cms.label(
    data=False,
    year=year,
    lumi=lumi[year],
    loc=0,
    ax=ax,
)

hep.histplot(
    [hists[x].counts for x in keys ],
    hists['pt_ttbar_OS'].edges,
    w2=[hists[x].errors for x in keys ],
    histtype="fill",
    stack=True,
    label=['%s (%.0f)'%(hists[x].label, sum(hists[x].counts)) for x in keys],
    color=[ hists[x].color for x in keys ],
    ax=ax)

hep.histplot(
    [hists['pt_ttbar_SS'].counts+hists['pt_DY_SS'].counts],
    hists['pt_ttbar_SS'].edges,
    w2=[hists['pt_ttbar_SS'].errors+hists['pt_DY_SS'].errors ],
    histtype="step",
    stack=False,
    label=['%s (%.0f)'%('SS (total)', sum(hists['pt_ttbar_SS'].counts+hists['pt_DY_SS'].counts))],
    color=['#525B76'],
    ax=ax)

hep.histplot(
    [ratio.counts],
    ratio.edges,
    w2=[ratio.errors],
    histtype="errorbar",
    color=['black'],
    ax=rax)

rax.set_ylim(0,2)
rax.set_xlabel(r'$p_{T}\ (GeV)$')
rax.set_ylabel(r'SS/OS')
rax.axhline(y=1, color='r', linestyle='-')

ax.set_ylabel(r'Events')
ax.set_yscale('log')
#ax.set_ylim(0.1,1e5)

#add_uncertainty(hists['N_ele_SS'], ax, ratio=True)
#add_uncertainty(hists['N_ele_OS'], ax, ratio=True)

ax.legend()

plt.show()

fig.savefig('/home/users/ewallace/public_html/FCNC/pt2_'+str(year)+'.png')
fig.savefig('/home/users/ewallace/public_html/FCNC/pt2_'+str(year)+'.pdf')


fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h1_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C2")
h2_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C1")
h1_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h2_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS (all) (ttbar)", "weighted OS (all) (ttbar)"])

ax1.set_xlabel(r'$p_{T}\ (GeV) $')
ax1.set_ylabel(r'Events')

h1_TT.projection('x').divide(h2_TT.projection('x'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')



fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h1_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C2")
h2_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C1")
h1_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h2_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS (all) (DY)", "weighted OS (all) (DY)"])

ax1.set_xlabel(r'$p_{T}\ (GeV) $')
ax1.set_ylabel(r'Events')

h1_DY.projection('x').divide(h2_DY.projection('x'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')


In [None]:
hists = {
    'eta_ttbar_SS': h1_TT.projection('y'),
    'eta_ttbar_OS': h2_TT.projection('y'),
    'eta_DY_SS': h1_DY.projection('y'),
    'eta_DY_OS': h2_DY.projection('y'),
}

hists['eta_ttbar_OS'].label = '$OS\ (ttbar)$'
hists['eta_ttbar_OS'].color = '#8AC926'

hists['eta_DY_OS'].label = '$OS\ (DY)$'
hists['eta_DY_OS'].color = '#FFCA3A'

keys = ['eta_ttbar_OS', 'eta_DY_OS']

#total_mc = get_total(my_histos, keys)

ratio = (hists['eta_ttbar_SS']+hists['eta_DY_SS']).divide(hists['eta_ttbar_OS']+hists['eta_DY_OS'] )

In [None]:
fig, (ax, rax) = plt.subplots(2,1,figsize=(10,10), gridspec_kw={"height_ratios": (3, 1), "hspace": 0.05}, sharex=True)
hep.cms.label(
    data=False,
    year=year,
    lumi=lumi[year],
    loc=0,
    ax=ax,
)

hep.histplot(
    [hists[x].counts for x in keys ],
    hists['eta_ttbar_OS'].edges,
    w2=[hists[x].errors for x in keys ],
    histtype="fill",
    stack=True,
    label=['%s (%.0f)'%(hists[x].label, sum(hists[x].counts)) for x in keys],
    color=[ hists[x].color for x in keys ],
    ax=ax)

hep.histplot(
    [hists['eta_ttbar_SS'].counts+hists['eta_DY_SS'].counts],
    hists['eta_ttbar_SS'].edges,
    w2=[hists['eta_ttbar_SS'].errors+hists['eta_DY_SS'].errors ],
    histtype="step",
    stack=False,
    label=['%s (%.0f)'%('SS (total)', sum(hists['eta_ttbar_SS'].counts+hists['eta_DY_SS'].counts))],
    color=['#525B76'],
    ax=ax)

hep.histplot(
    [ratio.counts],
    ratio.edges,
    w2=[ratio.errors],
    histtype="errorbar",
    label = ['total'],
    color=['black'],
    ax=rax)

#rax.set_ylim(0.95,1.55)
rax.set_xlabel(r'$\eta$')
rax.set_ylabel(r'SS/OS')
rax.axhline(y=1, color='r', linestyle='-')
ax.set_ylabel(r'Events')
ax.set_yscale('log')
#ax.set_ylim(0.1,1e5)

#add_uncertainty(hists['N_ele_SS'], ax, ratio=True)
#add_uncertainty(hists['N_ele_OS'], ax, ratio=True)

ax.legend()
plt.show()

fig.savefig('/home/users/ewallace/public_html/FCNC/eta_'+str(year)+'.png')
fig.savefig('/home/users/ewallace/public_html/FCNC/eta_'+str(year)+'.pdf')


fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h1_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C2")
h2_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C1")
h1_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h2_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS (all) (ttbar)", "weighted OS (all) (ttbar)"])

ax1.set_xlabel(r'$\eta$')
ax1.set_ylabel(r'Events')

h1_TT.projection('y').divide(h2_TT.projection('y'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')


fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h1_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C2")
h2_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C1")
h1_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h2_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS (all) (DY)", "weighted OS (all) (DY)"])

ax1.set_xlabel(r'$\eta$')
ax1.set_ylabel(r'Events')

h1_DY.projection('y').divide(h2_DY.projection('y'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')

In [None]:
tmp1 = output['flipped_electron'].copy()
tmp1 = tmp1.rebin('pt', pt_bins)
tmp1 = tmp1.rebin('eta', eta_bins)

tmp2 = output['flipped_electron2'].copy()
tmp2 = tmp2.rebin('pt', pt_bins)
tmp2 = tmp2.rebin('eta', eta_bins)

tmp3 = output['flipped_electron3'].copy()
tmp3 = tmp3.rebin('pt', pt_bins)
tmp3 = tmp3.rebin('eta', eta_bins)

tmp4 = output['flipped_electron4'].copy()
tmp4 = tmp4.rebin('pt', pt_bins)
tmp4 = tmp4.rebin('eta', eta_bins)

h1 = Hist2D.from_bincounts(
    tmp1.sum('dataset').values()[()].T,
    (tmp1.axis('pt').edges(), tmp1.axis('eta').edges()),
    )

h2 = Hist2D.from_bincounts(
    tmp2.sum('dataset').values()[()].T,
    (tmp2.axis('pt').edges(), tmp2.axis('eta').edges()),
)

h3 = Hist2D.from_bincounts(
    tmp3.sum('dataset').values()[()].T,
    (tmp3.axis('pt').edges(), tmp3.axis('eta').edges()),
)

h4 = Hist2D.from_bincounts(
    tmp4.sum('dataset').values()[()].T,
    (tmp4.axis('pt').edges(), tmp4.axis('eta').edges()),
)

h1_TT = Hist2D.from_bincounts(
    tmp1.values()[('/TTTo2L2Nu_TuneCP5_PSweights_13TeV-powheg-pythia8/RunIIFall17NanoAODv7-PU2017_12Apr2018_Nano02Apr2020_new_pmx_102X_mc2017_realistic_v8-v1/NANOAODSIM',)].T,
    (tmp1.axis('pt').edges(), tmp1.axis('eta').edges()),
    errors = np.sqrt(tmp1.values(sumw2=True)[('/TTTo2L2Nu_TuneCP5_PSweights_13TeV-powheg-pythia8/RunIIFall17NanoAODv7-PU2017_12Apr2018_Nano02Apr2020_new_pmx_102X_mc2017_realistic_v8-v1/NANOAODSIM',)][1].T),
    )

h1_TT = h1_TT*87.315*1000/4960123622.722055

h2_TT = Hist2D.from_bincounts(
    tmp2.values()[('/TTTo2L2Nu_TuneCP5_PSweights_13TeV-powheg-pythia8/RunIIFall17NanoAODv7-PU2017_12Apr2018_Nano02Apr2020_new_pmx_102X_mc2017_realistic_v8-v1/NANOAODSIM',)].T,
    (tmp2.axis('pt').edges(), tmp2.axis('eta').edges()),
    errors = np.sqrt(tmp2.values(sumw2=True)[('/TTTo2L2Nu_TuneCP5_PSweights_13TeV-powheg-pythia8/RunIIFall17NanoAODv7-PU2017_12Apr2018_Nano02Apr2020_new_pmx_102X_mc2017_realistic_v8-v1/NANOAODSIM',)][1].T),
)

h2_TT = h2_TT*87.315*1000/4960123622.722055

h3_TT = Hist2D.from_bincounts(
    tmp3.values()[('/TTTo2L2Nu_TuneCP5_PSweights_13TeV-powheg-pythia8/RunIIFall17NanoAODv7-PU2017_12Apr2018_Nano02Apr2020_new_pmx_102X_mc2017_realistic_v8-v1/NANOAODSIM',)].T,
    (tmp3.axis('pt').edges(), tmp3.axis('eta').edges()),
    errors = np.sqrt(tmp3.values(sumw2=True)[('/TTTo2L2Nu_TuneCP5_PSweights_13TeV-powheg-pythia8/RunIIFall17NanoAODv7-PU2017_12Apr2018_Nano02Apr2020_new_pmx_102X_mc2017_realistic_v8-v1/NANOAODSIM',)][1].T),
)

h3_TT = h3_TT*87.315*1000/4960123622.722055

h4_TT = Hist2D.from_bincounts(
    tmp4.values()[('/TTTo2L2Nu_TuneCP5_PSweights_13TeV-powheg-pythia8/RunIIFall17NanoAODv7-PU2017_12Apr2018_Nano02Apr2020_new_pmx_102X_mc2017_realistic_v8-v1/NANOAODSIM',)].T,
    (tmp4.axis('pt').edges(), tmp4.axis('eta').edges()),
    errors = np.sqrt(tmp4.values(sumw2=True)[('/TTTo2L2Nu_TuneCP5_PSweights_13TeV-powheg-pythia8/RunIIFall17NanoAODv7-PU2017_12Apr2018_Nano02Apr2020_new_pmx_102X_mc2017_realistic_v8-v1/NANOAODSIM',)][1].T),
)

h4_TT = h4_TT*87.315*1000/4960123622.722055

h1_DY = Hist2D.from_bincounts(
    tmp1.values()[('/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17NanoAODv7-PU2017RECOSIMstep_12Apr2018_Nano02Apr2020_102X_mc2017_realistic_v8_ext1-v1/NANOAODSIM',)].T,
    (tmp1.axis('pt').edges(), tmp1.axis('eta').edges()),
    errors = np.sqrt(tmp1.values(sumw2=True)[('/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17NanoAODv7-PU2017RECOSIMstep_12Apr2018_Nano02Apr2020_102X_mc2017_realistic_v8_ext1-v1/NANOAODSIM',)][1].T),
)

h1_DY = h1_DY*6021.0*1000/48968256.0

h2_DY = Hist2D.from_bincounts(
    tmp2.values()[('/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17NanoAODv7-PU2017RECOSIMstep_12Apr2018_Nano02Apr2020_102X_mc2017_realistic_v8_ext1-v1/NANOAODSIM',)].T,
    (tmp2.axis('pt').edges(), tmp2.axis('eta').edges()),
    errors = np.sqrt(tmp2.values(sumw2=True)[('/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17NanoAODv7-PU2017RECOSIMstep_12Apr2018_Nano02Apr2020_102X_mc2017_realistic_v8_ext1-v1/NANOAODSIM',)][1].T),
)

h2_DY = h2_DY*6021.0*1000/48968256.0

h3_DY = Hist2D.from_bincounts(
    tmp3.values()[('/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17NanoAODv7-PU2017RECOSIMstep_12Apr2018_Nano02Apr2020_102X_mc2017_realistic_v8_ext1-v1/NANOAODSIM',)].T,
    (tmp3.axis('pt').edges(), tmp3.axis('eta').edges()),
    errors = np.sqrt(tmp3.values(sumw2=True)[('/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17NanoAODv7-PU2017RECOSIMstep_12Apr2018_Nano02Apr2020_102X_mc2017_realistic_v8_ext1-v1/NANOAODSIM',)][1].T),
)

h3_DY = h3_DY*6021.0*1000/48968256.0

h4_DY = Hist2D.from_bincounts(
    tmp4.values()[('/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17NanoAODv7-PU2017RECOSIMstep_12Apr2018_Nano02Apr2020_102X_mc2017_realistic_v8_ext1-v1/NANOAODSIM',)].T,
    (tmp4.axis('pt').edges(), tmp4.axis('eta').edges()),
    errors = np.sqrt(tmp4.values(sumw2=True)[('/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIFall17NanoAODv7-PU2017RECOSIMstep_12Apr2018_Nano02Apr2020_102X_mc2017_realistic_v8_ext1-v1/NANOAODSIM',)][1].T),
)

h4_DY = h4_DY*6021.0*1000/48968256.0

In [None]:
fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h1_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C2")
h2_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C1")
h1_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h2_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS with one flip (ee) (ttbar)", "weighted OS with no flips (ee) (ttbar)"])

ax1.set_xlabel(r'$p_{T}\ (GeV) $')
ax1.set_ylabel(r'Events')

h1_TT.projection('x').divide(h2_TT.projection('x'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')

fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h3_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C2")
h4_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C1")
h3_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h4_TT.projection('x').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS with one flip (emu) (ttbar)", "weighted OS with no flips (emu) (ttbar)"])

ax1.set_xlabel(r'$p_{T}\ (GeV) $')
ax1.set_ylabel(r'Events')

h3_TT.projection('x').divide(h4_TT.projection('x'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')

fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h1_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C2")
h2_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C1")
h1_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h2_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS with one flip (ee) (DY)", "weighted OS with no flips (ee) (DY)"])

ax1.set_xlabel(r'$p_{T}\ (GeV) $')
ax1.set_ylabel(r'Events')

h1_DY.projection('x').divide(h2_DY.projection('x'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')


fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h3_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C2")
h4_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C1")
h3_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h4_DY.projection('x').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS with one flip (emu) (DY)", "OS weighted with no flips (emu) (DY)"])

ax1.set_xlabel(r'$p_{T}\ (GeV) $')
ax1.set_ylabel(r'Events')

h3_DY.projection('x').divide(h4_DY.projection('x'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')


In [None]:
fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h1_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C2")
h2_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C1")
h1_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h2_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS with one flip(ee) (ttbar)", "weighted OS with no flips (ee) (ttbar)"])

ax1.set_xlabel(r'$\eta$')
ax1.set_ylabel(r'Events')

h1_TT.projection('y').divide(h2_TT.projection('y'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')

fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h3_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C2")
h4_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C1")
h3_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h4_TT.projection('y').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS with one flip (emu) (ttbar)", "weighted OS with one flip (emu) (ttbar)"])

ax1.set_xlabel(r'$\eta$')
ax1.set_ylabel(r'Events')

h3_TT.projection('y').divide(h4_TT.projection('y'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')

fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h1_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C2")
h2_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C1")
h1_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h2_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS with one flip (ee) (DY)", "weighted OS with no flips (ee) (DY)"])

ax1.set_xlabel(r'$\eta$')
ax1.set_ylabel(r'Events')

h1_DY.projection('y').divide(h2_DY.projection('y'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')



fig, (ax1,ax2) = plt.subplots(2, sharex=True, figsize=(10,10), gridspec_kw=dict(height_ratios=[3, 1]))
h3_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C2")
h4_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C1")
h3_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C2", show_errors=True)
h4_DY.projection('y').plot(ax=ax1, alpha=0.8, color="C1", show_errors=True)


fig.legend(["SS with one flip (emu) (DY)", "OS weighted with no flips (emu) (DY)"])

ax1.set_xlabel(r'$\eta$')
ax1.set_ylabel(r'Events')

h3_DY.projection('y').divide(h4_DY.projection('y'), binomial=False).plot(ax=ax2, errors=True, label="SS/OS")
ax2.axhline(y=1, color='r', linestyle='-')


In [None]:
tmp1 = output['lepton_parent'].copy()
#tmp1 = tmp1.rebin('multiplicity', N_bins_red)


tmp2 = output['lepton_parent2'].copy()
#tmp2 = tmp2.rebin('multiplicity', N_bins_red)


h1 = Hist1D.from_bincounts(
    tmp1.sum('dataset').values()[()].T,
    (tmp1.axis('pdgID').edges()),
)

h2 = Hist1D.from_bincounts(
    tmp1.sum('dataset').values()[()].T,
    (tmp2.axis('pdgID').edges()),
)


fig, (ax1) = plt.subplots(1, figsize=(10,10))
h1.plot(ax=ax1, alpha=1, color="C3")
h2.plot(ax=ax1, alpha=1, color="C0")

ax1.set_yscale("log")
ax1.set_xlabel(r'$pdg\ ID\ (parent) $')
ax1.set_ylabel(r'Events')

fig.legend(["leading electron (SS)", "trailing electron (SS)"])


In [None]:
tmp1 = output['N_jet'].copy()
tmp1 = tmp1.rebin('multiplicity', N_bins_red)



h1 = Hist1D.from_bincounts(
    tmp1.sum('dataset').values()[()].T,
    (tmp1.axis('multiplicity').edges()),
)


h1_TT = Hist1D.from_bincounts(
    tmp1.values()[('/TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8/RunIIAutumn18NanoAODv7-Nano02Apr2020_102X_upgrade2018_realistic_v21-v1/NANOAODSIM',)].T,
    (tmp1.axis('multiplicity').edges()),
)


h1_DY = Hist1D.from_bincounts(
    tmp1.values()[('/DYJetsToLL_M-50_TuneCP5_13TeV-madgraphMLM-pythia8/RunIIAutumn18NanoAODv7-Nano02Apr2020_102X_upgrade2018_realistic_v21-v1/NANOAODSIM',)].T,
    (tmp1.axis('multiplicity').edges()),
)


fig, (ax1) = plt.subplots(1, figsize=(10,10))
h1.plot(ax=ax1, alpha=1, color="C3")

ax1.set_yscale("log")
ax1.set_xlabel(r'$N_{lepton}\ $')
ax1.set_ylabel(r'Events')

fig.legend(["jets"])


fig, (ax1) = plt.subplots(1, figsize=(10,10))
h1_DY.plot(ax=ax1, alpha=1, color="C3")

ax1.set_yscale("log")
ax1.set_xlabel(r'$N_{lepton}\ $')
ax1.set_ylabel(r'Events')

fig.legend(["jets"])

fig, (ax1) = plt.subplots(1,  figsize=(10,10))
h1_TT.plot(ax=ax1, alpha=1, color="C3")

ax1.set_yscale("log")
ax1.set_xlabel(r'$N_{lepton}\ $')
ax1.set_ylabel(r'Events')

fig.legend(["jets"])


In [None]:
tmp1 = output['electron'].copy()
tmp1 = tmp1.rebin('eta', eta_bins)
tmp1 = tmp1.rebin('pt', pt_bins)


tmp2 = output['electron2'].copy()
tmp2 = tmp2.rebin('eta', eta_bins)
tmp2 = tmp2.rebin('pt', pt_bins)

h1 = Hist2D.from_bincounts(
    tmp1.sum('dataset').values()[()].T,
    (tmp1.axis('pt').edges(), tmp1.axis('eta').edges()),
)


h2 = Hist2D.from_bincounts(
    tmp2.sum('dataset').values()[()].T,
    (tmp2.axis('pt').edges(), tmp2.axis('eta').edges()),
)