In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os

from Tools.config_helpers import loadConfig, make_small
from klepto.archives import dir_archive

from coffea.nanoevents import NanoEventsFactory, NanoAODSchema
from coffea import processor, hist

from processor.trilep_analysis import trilep_analysis
from processor.default_accumulators import desired_output, add_processes_to_output, variations

In [4]:
# an equivalent code can be run with ipython -i trilep_analysis.py within the processor directory

from Tools.samples import fileset_2018, fileset_2018_small

overwrite = False
year      = 2018
small     = True

# load the config and the cache
cfg = loadConfig()

cacheName = 'trilep_analysis'
if small: cacheName += '_small'

cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), cacheName), serialized=True)

fileset = {
    'topW_v3': fileset_2018['topW_v3'],
    'topW_EFT_cp8': fileset_2018['topW_EFT_cp8'],
    'topW_EFT_mix': fileset_2018['topW_EFT_mix'],
    'TTW': fileset_2018['TTW'],
    'TTZ': fileset_2018['TTZ'],
    'TTH': fileset_2018['TTH'],
    'diboson': fileset_2018['diboson'],
    'ttbar': fileset_2018['top2l'],
    'DY': fileset_2018['DY'],
}

fileset = make_small(fileset, small, n_max=1)  # run over just one file each if small is set to true

# add all processes to output so that the cutflow doesn't fail
add_processes_to_output(fileset, desired_output)

# add some histograms that we defined in the processor
# everything else is taken the default_accumulators.py
from processor.default_accumulators import mass_axis, dataset_axis
desired_output.update({
    "dilep_mass": hist.Hist("Counts", dataset_axis, mass_axis),
})

histograms = sorted(list(desired_output.keys()))

exe_args = {
    'workers': 16,
    'function_args': {'flatten': False},
    "schema": NanoAODSchema,
}
exe = processor.futures_executor

if not overwrite:
    cache.load()

if cfg == cache.get('cfg') and histograms == cache.get('histograms') and cache.get('simple_output'):
    output = cache.get('simple_output')

else:
    print ("I'm running now")
    
    output = processor.run_uproot_job(
        fileset,
        "Events",
        trilep_analysis(year=year, variations=variations, accumulator=desired_output),
        exe,
        exe_args,
        chunksize=250000,
    )
    
    cache['fileset']        = fileset
    cache['cfg']            = cfg
    cache['histograms']     = histograms
    cache['simple_output']  = output
    cache.dump()

I'm running now


Preprocessing:   0%|          | 0/9 [00:00<?, ?file/s]

Processing:   0%|          | 0/12 [00:00<?, ?chunk/s]

In [5]:
# we can just take the process list from the fileset to avoid any confusions
processes = list(fileset.keys())

processes.remove('topW_EFT_cp8')
processes.remove('topW_EFT_mix')

# the following list is directly taken from Tools/selection.py
lines = ['entry']
lines += [
            'filter',
            'lepveto',
            'trilep',
            'p_T(lep0)>25',
            'p_T(lep1)>20',
            'trigger',
            'offZ',
            'MET>50',
            'N_jet>2',
            'N_central>1',
            'N_btag>0',
            'N_fwd>0',
        ]

from Tools.helpers import getCutFlowTable
df = getCutFlowTable(output, processes=processes, lines=lines, significantFigures=4, signal='topW_v3')
df

Unnamed: 0,topW_v3,TTW,TTZ,TTH,diboson,ttbar,DY,S/B
entry,8.901 +/- 0.042,1604.0 +/- 4.0,519.9 +/- 1.3,168.5 +/- 0.5,665.5 +/- 3.0,17180.0 +/- 200.0,1759000.0 +/- 3000.0,0.0
filter,8.891 +/- 0.042,1602.0 +/- 4.0,519.1 +/- 1.3,168.4 +/- 0.5,665.4 +/- 3.0,17170.0 +/- 200.0,1759000.0 +/- 3000.0,0.0
lepveto,1.24 +/- 0.016,100.5 +/- 1.0,133.5 +/- 0.6,5.338 +/- 0.09,2.294 +/- 0.178,33.89 +/- 10.19,6483.0 +/- 162.0,0.0002
trilep,0.259 +/- 0.007,19.63 +/- 0.42,41.18 +/- 0.35,0.802 +/- 0.035,0.013 +/- 0.013,0 +/- 0.0,17.76 +/- 8.01,0.0033
p_T(lep0)>25,0.259 +/- 0.007,19.61 +/- 0.42,41.16 +/- 0.35,0.8 +/- 0.035,0.013 +/- 0.013,0 +/- 0.0,17.76 +/- 8.01,0.0033
p_T(lep1)>20,0.258 +/- 0.007,19.47 +/- 0.42,40.98 +/- 0.35,0.791 +/- 0.034,0.013 +/- 0.013,0 +/- 0.0,17.76 +/- 8.01,0.0033
trigger,0.249 +/- 0.007,18.72 +/- 0.41,39.69 +/- 0.35,0.735 +/- 0.033,0.013 +/- 0.013,0 +/- 0.0,17.76 +/- 8.01,0.0032
offZ,0.217 +/- 0.006,16.42 +/- 0.39,5.673 +/- 0.131,0.587 +/- 0.03,0.013 +/- 0.013,0 +/- 0.0,7.244 +/- 5.135,0.0072
MET>50,0.177 +/- 0.006,13.13 +/- 0.35,3.911 +/- 0.108,0.434 +/- 0.025,0.013 +/- 0.013,0 +/- 0.0,0 +/- 0.0,0.0101
N_jet>2,0.153 +/- 0.005,7.355 +/- 0.278,2.783 +/- 0.091,0.175 +/- 0.016,0 +/- 0.0,0 +/- 0.0,0 +/- 0.0,0.0148


In [6]:
df = getCutFlowTable(output, processes=processes, lines=lines, significantFigures=4, absolute=False, signal='topW_v3')
df

Unnamed: 0,topW_v3,TTW,TTZ,TTH,diboson,ttbar,DY
entry,1.0,1.0,1.0,1.0,1.0,1.0,1.0
filter,0.9989,0.9988,0.9985,0.9992,0.9997,0.9999,0.9998
lepveto,0.1394,0.0627,0.2571,0.0317,0.0034,0.002,0.0037
trilep,0.209,0.1954,0.3085,0.1502,0.0056,0.0,0.0027
p_T(lep0)>25,0.9997,0.9993,0.9996,0.9982,1.0,1.0,1.0
p_T(lep1)>20,0.9953,0.9928,0.9957,0.9877,1.0,1.0,1.0
trigger,0.9644,0.9614,0.9685,0.9302,1.0,1.0,1.0
offZ,0.8719,0.8773,0.1429,0.7979,1.0,1.0,0.4079
MET>50,0.8166,0.7994,0.6895,0.7392,1.0,1.0,0.0
N_jet>2,0.8623,0.5602,0.7115,0.4041,0.0,1.0,1.0


In [9]:
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import mplhep as hep
plt.style.use(hep.style.CMS)

from plots.helpers import makePlot

# defining some new axes for rebinning.
N_bins = hist.Bin('multiplicity', r'$N$', 10, -0.5, 9.5)
N_bins_red = hist.Bin('multiplicity', r'$N$', 5, -0.5, 4.5)
mass_bins = hist.Bin('mass', r'$M\ (GeV)$', 20, 0, 200)
pt_bins = hist.Bin('pt', r'$p_{T}\ (GeV)$', 30, 0, 300)
pt_bins_coarse = hist.Bin('pt', r'$p_{T}\ (GeV)$', 10, 0, 300)
eta_bins = hist.Bin('eta', r'$\eta $', 25, -5.0, 5.0)

my_labels = {
    'topW_v3': 'top-W scat.',
    'topW_EFT_cp8': 'EFT, cp8',
    'topW_EFT_mix': 'EFT mix',
    'TTZ': r'$t\bar{t}Z$',
    'TTW': r'$t\bar{t}W$',
    'TTH': r'$t\bar{t}H$',
    'diboson': 'VV/VVV',
    'ttbar': r'$t\bar{t}$',
    'DY': 'Drell-Yan',
}

my_colors = {
    'topW_v3': '#FF595E',
    'topW_EFT_cp8': '#000000',
    'topW_EFT_mix': '#0F7173',
    'TTZ': '#FFCA3A',
    'TTW': '#8AC926',
    'TTH': '#34623F',
    'diboson': '#525B76',
    'ttbar': '#1982C4',
    'DY': '#6A4C93',}

In [10]:
makePlot(output, 'lead_lep', 'pt',
         data=[],
         bins=pt_bins_coarse, log=True, normalize=False, axis_label=r'$p_{T}$ (lead lep) (GeV)',
         new_colors=my_colors, new_labels=my_labels,
         order=['diboson', 'TTW', 'TTH', 'TTZ', 'ttbar'],
         signals=['topW_v3', 'topW_EFT_cp8', 'topW_EFT_mix'],
        )

TypeError: makePlot() got an unexpected keyword argument 'data'

In [None]:
makePlot(output, 'lead_lep', 'pt',
         data=[],
         bins=pt_bins_coarse, log=True, normalize=False, axis_label=r'$p_{T}$ (lead lep) (GeV)',
         new_colors=my_colors, new_labels=my_labels,
         order=['topW_v3', 'diboson', 'TTW', 'TTH', 'TTZ', 'ttbar'],
         signals=[],
         omit=['topW_EFT_cp8', 'topW_EFT_mix'],
        )

In [None]:
makePlot(output, 'lead_lep', 'pt',
         data=[],
         shape=True,
         bins=pt_bins_coarse, log=True, normalize=False, axis_label=r'$p_{T}$ (lead lep) (GeV)',
         new_colors=my_colors, new_labels=my_labels,
         omit=['topW_EFT_cp8', 'topW_EFT_mix'],
        )

In [None]:
makePlot(output, 'lead_lep', 'pt',
         data=[],
         shape=True,
         bins=pt_bins_coarse, log=True, normalize=False, axis_label=r'$p_{T}$ (lead lep) (GeV)',
         new_colors=my_colors, new_labels=my_labels,
         omit=['DY', 'ttbar', 'TTZ', 'TTH', 'diboson'],
        )

In [None]:
makePlot(output, 'dilep_mass', 'mass',
         bins=mass_bins, log=False, normalize=False, axis_label=r'$M_{\ell\ell}$ (GeV)',
         new_colors=my_colors, new_labels=my_labels,
         order=['topW_v3', 'diboson',  'TTW', 'TTH', 'TTZ', 'ttbar', 'DY'],
         omit=['topW_EFT_cp8', 'topW_EFT_mix'],
        )
makePlot(output, 'dilep_mass', 'mass',
         shape=True,
         bins=mass_bins, log=False, normalize=False, axis_label=r'$M_{\ell\ell}$ (GeV)',
         new_colors=my_colors, new_labels=my_labels,
         ymax=0.7,
         omit=['topW_EFT_cp8', 'topW_EFT_mix'],
        )