In [12]:
import os, sys, glob, re
import coffea.util
from plot_utils import *

In [13]:
dataDir = '/data/submit/cms/store/user/lavezzo/SUEP/cutflows5/'
subDirs = os.listdir(dataDir)

In [14]:
counts = {}
for subDir in subDirs:
    files = os.listdir(dataDir + subDir)
    
    for i, f in enumerate(files):
        if i == 0: 
            cum = coffea.util.load(os.path.join(dataDir, subDir, f))
        else:
            temp = coffea.util.load(os.path.join(dataDir, subDir, f))
            for key in temp:
                cum[key] += temp[key]
                
    counts[subDir.split('+')[0]] = cum

In [15]:
counts.keys()

dict_keys(['QCD_HT500to700_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'SUEP-m1000-generic', 'SUEP-m400-generic', 'SUEP-m125-generic-htcut', 'QCD_HT100to200_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'SUEP-m750-generic', 'QCD_HT50to100_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'QCD_HT1500to2000_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'SUEP-m125-generic', 'QCD_HT300to500_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'QCD_HT2000toInf_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'QCD_HT1000to1500_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'QCD_HT200to300_TuneCP5_PSWeights_13TeV-madgraph-pythia8'])

In [16]:
counts_generic = {}
for k, i in counts.items(): 
    if 'generic' not in k: continue
    #if 'htcut' in k: continue
    
    counts_generic[k] = i
    
counts_qcd = None
for k, i in counts.items(): 
    if 'QCD_HT' not in k: continue
    
    xsection = getXSection(k+'+RunIISummer20UL18MiniAODv2-106X_upgrade2018_realistic_v16_L1v1-v1+MINIAODSIM', '2018')

    if counts_qcd is None: 
        counts_qcd = {}
        for key in counts[k].keys():
            val = counts[k][key][list(counts[k][key].keys())[0]]
            sumw = counts[k]['sumw'][list(counts[k][key].keys())[0]]
            scale = lumis['2018'] * xsection / sumw
            counts_qcd[key] = val*scale

    else:
        for key in counts[k].keys():
            if len(list(counts[k][key])) > 0:
                val = counts[k][key][list(counts[k][key].keys())[0]]
                sumw = counts[k]['sumw'][list(counts[k][key].keys())[0]]
                scale = lumis['2018'] * xsection / sumw
                counts_qcd[key] += val * scale

In [17]:
counts_generic.keys()

dict_keys(['SUEP-m1000-generic', 'SUEP-m400-generic', 'SUEP-m125-generic-htcut', 'SUEP-m750-generic', 'SUEP-m125-generic'])

In [18]:
columnLabels = ['Selection', 'QCD', r'$M_{S}$ = 125 GeV', r'$M_{S}$ = 400 GeV', 
           r'$M_{S}$ = 750 GeV', r'$M_{S}$ = 1000 GeV']
selectionLabels = ['Total', 'Trigger (PFHT1050)', 
              r'$H_T$ > 1200 GeV', 'Two ak15 Jets',
             '# Constituents > 80', r'$Sph_1$ > 0.5']

selectionKeys = list(counts_generic[list(counts_generic.keys())[0]].keys())[1:]
columnKeys = ['QCD', 'SUEP-m125-generic-htcut', 'SUEP-m400-generic', 
              'SUEP-m750-generic', 'SUEP-m1000-generic'] 

In [8]:
import hist.intervals as intervals
import numpy as np

In [19]:
table = []
table.append(columnLabels)

for label, selection in zip(selectionLabels, selectionKeys):
    columns = []
    columns.append(label)
    for k in columnKeys: 
        if k == 'QCD':
            
            val = 0
            differences = [[],[]]
            for sample, i in counts.items(): 
                if 'QCD_HT' not in sample: continue
                if len(list(counts[sample][selection])) == 0: continue
                    
                xsection = getXSection(sample+'+RunIISummer20UL18MiniAODv2-106X_upgrade2018_realistic_v16_L1v1-v1+MINIAODSIM', '2018')
                
                bin_val = counts[sample][selection][list(counts[sample][selection].keys())[0]]
                
                sumw = counts[sample]['sumw'][list(counts[sample][selection].keys())[0]]
                scale = lumis['2018'] * xsection / sumw

                bin_val *= scale

                val += bin_val
                
            interval = [x[0] for x in intervals.poisson_interval(np.array([val]), np.array([val]))]
            differences = [round(x - val) for x in interval]
            val = round(val)
            val = round(val)
                
        else:
            
            mass = k.split("SUEP-m")[1]
            mass = mass.split('-')[0]
            mass = str(mass)
            xsec = getXSection(mass, 2018, SUEP=True)
            
            if 'htcut' in k: xsec *= 0.003
            
            counts_sample = counts_generic[k][selection]
            sumw_sample = counts_generic[k]['sumw']
            val = round(counts_sample[list(counts_sample.keys())[0]])
            sumw = round(sumw_sample[list(sumw_sample.keys())[0]])
            
            scale = (lumis['2018'] * xsec / sumw)
            
            interval = [x[0]*scale for x in intervals.poisson_interval(np.array([val]))]
            val *= scale
                        
            differences = [round(x - val) for x in interval]
            val = round(val)
                        
        columns.append('$'+str(val)+'^{+'+str(differences[1])+'}_{'+str(differences[0])+'}$')
    table.append(columns)

In [20]:
table

[['Selection',
  'QCD',
  '$M_{S}$ = 125 GeV',
  '$M_{S}$ = 400 GeV',
  '$M_{S}$ = 750 GeV',
  '$M_{S}$ = 1000 GeV'],
 ['Total',
  '$12980865195580^{+3602898}_{-3602897}$',
  '$6368^{+53}_{-53}$',
  '$359936^{+3654}_{-3617}$',
  '$30506^{+308}_{-305}$',
  '$10364^{+105}_{-104}$'],
 ['Trigger (PFHT1050)',
  '$47165295^{+6869}_{-6868}$',
  '$4723^{+46}_{-46}$',
  '$4036^{+421}_{-382}$',
  '$769^{+52}_{-48}$',
  '$367^{+21}_{-19}$'],
 ['$H_T$ > 1200 GeV',
  '$26703851^{+5169}_{-5168}$',
  '$3130^{+38}_{-37}$',
  '$3272^{+383}_{-344}$',
  '$628^{+47}_{-44}$',
  '$312^{+19}_{-18}$'],
 ['Two ak15 Jets',
  '$24238972^{+4924}_{-4923}$',
  '$2986^{+37}_{-36}$',
  '$3199^{+379}_{-340}$',
  '$607^{+46}_{-43}$',
  '$286^{+18}_{-17}$'],
 ['# Constituents > 80',
  '$99914^{+317}_{-316}$',
  '$179^{+9}_{-9}$',
  '$3090^{+373}_{-335}$',
  '$604^{+46}_{-43}$',
  '$285^{+18}_{-17}$'],
 ['$Sph_1$ > 0.5',
  '$48689^{+222}_{-221}$',
  '$93^{+7}_{-6}$',
  '$2872^{+361}_{-322}$',
  '$577^{+45}_{-42}$',
  '$2

In [21]:
import tabulate
t = tabulate.tabulate(table,tablefmt ='latex')

def convert2LatexString(s):
    r = r'(\^\{\})'; s = re.sub(r, "^", s)
    s = re.sub(r'\\([\$\_\{\}\^])', r'\1', s)
    s = re.sub(r'(\\textbackslash{})', r'\\', s)
    return s

t = convert2LatexString(t)
with open('table.txt', 'w') as f:
    f.write(t)
t

'\\begin{tabular}{llllll}\n\\hline\n Selection           & QCD                                    & $M_{S}$ = 125 GeV  & $M_{S}$ = 400 GeV        & $M_{S}$ = 750 GeV     & $M_{S}$ = 1000 GeV    \\\\\n Total               & $12980865195580^{+3602898}_{-3602897}$ & $6368^{+53}_{-53}$ & $359936^{+3654}_{-3617}$ & $30506^{+308}_{-305}$ & $10364^{+105}_{-104}$ \\\\\n Trigger (PFHT1050)  & $47165295^{+6869}_{-6868}$             & $4723^{+46}_{-46}$ & $4036^{+421}_{-382}$     & $769^{+52}_{-48}$     & $367^{+21}_{-19}$     \\\\\n $H_T$ \\ensuremath{>} 1200 GeV    & $26703851^{+5169}_{-5168}$             & $3130^{+38}_{-37}$ & $3272^{+383}_{-344}$     & $628^{+47}_{-44}$     & $312^{+19}_{-18}$     \\\\\n Two ak15 Jets       & $24238972^{+4924}_{-4923}$             & $2986^{+37}_{-36}$ & $3199^{+379}_{-340}$     & $607^{+46}_{-43}$     & $286^{+18}_{-17}$     \\\\\n \\# Constituents \\ensuremath{>} 80 & $99914^{+317}_{-316}$                  & $179^{+9}_{-9}$    & $3090^{+373}_{-335}$     & $6