In [1]:
from __future__ import print_function, division
from collections import defaultdict, OrderedDict
import gzip
import pickle
import json
import os
import uproot
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from coffea import hist, processor 
from coffea.hist import plot

In [2]:
hists={}
pd = []
year = '2018'
dirname = '../grinder/pods/' + year
for filename in os.listdir(dirname):
    if 'MET' in filename or 'SingleElectron' in filename or 'SinglePhoton' in filename or 'EGamma' in filename: continue
    if '.pkl.gz' in filename:
        if filename.split("____")[0] not in pd: pd.append(filename.split("____")[0])
        with gzip.open(dirname+'/'+filename) as fin:
            hin = pickle.load(fin)
            for k in hin.keys():
                if k in hists: hists[k]+=hin[k]
                else: hists[k]=hin[k]

In [3]:
pdataset = hist.Cat("pdataset", "pdataset", sorting='placement')
pdataset_cats = ("dataset",)
pdataset_map = OrderedDict()

for pdi in pd:
    pdataset_map[pdi] = (pdi+"*",)

for key in hists.keys():
    hists[key] = hists[key].group(pdataset, pdataset_cats, pdataset_map)

In [4]:
scale={}
for pdi in hists['sumw'].identifiers('pdataset'):
    scale[pdi]=hists['sumw'].project('pdataset', pdi).values(overflow='all')[()][1]
    print(pdi,scale[pdi])

for key in hists.keys():
    if key=='sumw': continue
    for pdi in hists[key].identifiers('pdataset'):
        hists[key].scale({pdi:1/scale[pdi]},axis='pdataset')

MonoHs_Mzprime_500_Mhs_50_Mchi_150 291434.97132873535
MonoW_Mzprime_500_Mchi_150 288972.98516845703
MonoHs_Mzprime_500_Mhs_70_Mchi_150 296860.1000366211
MonoZ_Mzprime_500_Mchi_150 292182.6672363281
MonoHs_Mzprime_500_Mhs_90_Mchi_150 297390.70458984375
MonoJet_Mzprime_500_Mchi_150 308220.1226196289


In [5]:
process = hist.Cat("process", "Process", sorting='placement')

#process_cats = ("pdataset",)
#process_map = OrderedDict()
#process_map["TT"] = ("TT*",)

signal_cats = ("pdataset",)
signal_map = OrderedDict() ### for signal samples
signal_map["Mhs_50"] = ("*Mhs_50*",)  ## signals
signal_map["Mhs_70"] = ("*Mhs_70*",)
signal_map["Mhs_90"] = ("*Mhs_90*",)
signal_map["MonoJet"] = ("MonoJet*",)  ## signals
signal_map["MonoW"] = ("MonoW*",)    ## signals
signal_map["MonoZ"] = ("MonoZ*",)    ## signals
signal_hists = {}

for key in hists.keys():
    signal_hists[key] = hists[key].group(process, signal_cats, signal_map)
    #hists[key] = hists[key].group(process, process_cats, process_map)
    print(key)

sumw
CaloMinusPfOverRecoil
recoil
mindphi
j1pt
j1eta
j1phi
fj1pt
fj1eta
fj1phi
njets
ndcsvL
ndflvL
ndcsvM
ndflvM
ndcsvT
ndflvT
nfjtot
nfjgood
nfjclean
fjmass
e1pt
e1eta
e1phi
mu1pt
mu1eta
mu1phi
TopTagger
DarkHiggsTagger
VvsQCDTagger
probTbcq
probTbqq
probTbc
probTbq
probWcq
probWqq
probZbb
probZcc
probZqq
probHbb
probHcc
probHqqqq
probQCDbb
probQCDcc
probQCDb
probQCDc
probQCDothers


In [6]:
hists['recoil'].axis('recoil').label = 'Hadronic Recoil (GeV)'
hists['fj1pt'].axis('fj1pt').label = 'AK15 Leading Jet Pt (GeV)'
hists['j1pt'].axis('j1pt').label = 'AK4 Leading Jet Pt (GeV)'
hists['fjmass'].axis('fjmass').label = 'AK15 Leading Jet Mass (GeV)'

In [7]:
region = 'iszeroL'
jet_selection = 'baggy'
print('------------------')
print('------------------')
print('Category:',region)
print('------------------')

#for p in hists['recoil'].identifiers('process'):
#    yld = np.sum(hists['recoil'].project('jet_selection',jet_selection).project('region',region).project('process', p).values(overflow='all')[()])
#    print(p, '%.1f' % yld)

for p in signal_hists['recoil'].identifiers('process'):
    yld = np.sum(signal_hists['recoil'].project('jet_selection',jet_selection).project('region',region).project('process', p).values(overflow='all')[()])
    print(p, '%.1f' % yld)
    
print('------------------')
print('------------------')
print()

------------------
------------------
Category: iszeroL
------------------
Mhs_50 14315.5
Mhs_70 14829.6
Mhs_90 15693.2
MonoJet 21191.6
MonoW 19380.3
MonoZ 19854.3
------------------
------------------



In [8]:
region = 'iszeroL'
jet_selection = 'topveto'
print('------------------')
print('------------------')
print('Category:',region)
print('------------------')

#for p in hists['recoil'].identifiers('process'):
#    yld = np.sum(hists['recoil'].project('jet_selection',jet_selection).project('region',region).project('process', p).values(overflow='all')[()])
#    print(p, '%.1f' % yld)

for p in signal_hists['recoil'].identifiers('process'):
    yld = np.sum(signal_hists['recoil'].project('jet_selection',jet_selection).project('region',region).project('process', p).values(overflow='all')[()])
    print(p, '%.1f' % yld)
    
print('------------------')
print('------------------')
print()

------------------
------------------
Category: iszeroL
------------------
Mhs_50 11681.4
Mhs_70 12179.9
Mhs_90 12735.8
MonoJet 18916.9
MonoW 17511.0
MonoZ 18004.3
------------------
------------------



In [9]:
region = 'iszeroL'
jet_selection = 'ismonohs'
print('------------------')
print('------------------')
print('Category:',region)
print('------------------')
#for p in hists['recoil'].identifiers('process'):
#    yld = np.sum(hists['recoil'].project('jet_selection',jet_selection).project('region',region).project('process', p).values(overflow='all')[()])
#    print(p, '%.1f' % yld)

for p in signal_hists['recoil'].identifiers('process'):
    yld = np.sum(signal_hists['recoil'].project('jet_selection',jet_selection).project('region',region).project('process', p).values(overflow='all')[()])
    print(p, '%.1f' % yld)
    
print('------------------')
print('------------------')
print()

------------------
------------------
Category: iszeroL
------------------
Mhs_50 10205.3
Mhs_70 10827.6
Mhs_90 11424.1
MonoJet 1178.7
MonoW 1106.9
MonoZ 1302.4
------------------
------------------



In [10]:
region = 'iszeroL'
jet_selection = 'ismonoV'
print('------------------')
print('------------------')
print('Category:',region)
print('------------------')

#for p in hists['recoil'].identifiers('process'):
#    yld = np.sum(hists['recoil'].project('jet_selection',jet_selection).project('region',region).project('process', p).values(overflow='all')[()])
#    print(p, '%.1f' % yld)

for p in signal_hists['recoil'].identifiers('process'):
    yld = np.sum(signal_hists['recoil'].project('jet_selection',jet_selection).project('region',region).project('process', p).values(overflow='all')[()])
    print(p, '%.1f' % yld)
    
print('------------------')
print('------------------')
print()

------------------
------------------
Category: iszeroL
------------------
Mhs_50 752.9
Mhs_70 759.0
Mhs_90 735.8
MonoJet 6005.2
MonoW 11334.6
MonoZ 11338.1
------------------
------------------



In [11]:
region = 'iszeroL'
jet_selection = 'ismonojet'
print('------------------')
print('------------------')
print('Category:',region)
print('------------------')

#for p in hists['recoil'].identifiers('process'):
#    yld = np.sum(hists['recoil'].project('jet_selection',jet_selection).project('region',region).project('process', p).values(overflow='all')[()])
#    print(p, '%.1f' % yld)

for p in signal_hists['recoil'].identifiers('process'):
    yld = np.sum(signal_hists['recoil'].project('jet_selection',jet_selection).project('region',region).project('process', p).values(overflow='all')[()])
    print(p, '%.1f' % yld)
    
print('------------------')
print('------------------')
print()

------------------
------------------
Category: iszeroL
------------------
Mhs_50 723.2
Mhs_70 593.2
Mhs_90 575.8
MonoJet 11733.0
MonoW 5069.6
MonoZ 5363.7
------------------
------------------

