In [1]:
import os, sys, glob, re
import coffea.util
from plot_utils import *

In [2]:
dataDir = '/data/submit/lavezzo/cutflows2/'
subDirs = os.listdir(dataDir)

In [3]:
counts = {}
for subDir in subDirs:
    files = os.listdir(dataDir + subDir)
    
    for i, f in enumerate(files):
        if i == 0: 
            cum = coffea.util.load(os.path.join(dataDir, subDir, f))
        else:
            temp = coffea.util.load(os.path.join(dataDir, subDir, f))
            for key in temp:
                cum[key] += temp[key]
                
    counts[subDir.split('+')[0]] = cum

In [4]:
counts.keys()

dict_keys(['SUEP-m125-generic', 'SUEP-m400-darkPhoHad', 'SUEP-m750-darkPho', 'SUEP-m750-darkPhoHad', 'QCD_HT300to500_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'QCD_HT1000to1500_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'QCD_HT200to300_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'SUEP-m1000-darkPhoHad', 'QCD_HT2000toInf_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'SUEP-m125-darkPho', 'QCD_HT500to700_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'SUEP-m1000-generic', 'SUEP-m400-darkPho', 'SUEP-m400-generic', 'QCD_HT1500to2000_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'SUEP-m125-generic-htcut', 'QCD_HT50to100_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'SUEP-m1000-darkPho', 'QCD_HT100to200_TuneCP5_PSWeights_13TeV-madgraph-pythia8', 'SUEP-m750-generic', 'SUEP-m125-darkPhoHad'])

In [5]:
counts_generic = {}
for k, i in counts.items(): 
    if 'generic' not in k: continue
    if 'htcut' in k: continue
    
    counts_generic[k] = i
    
counts_qcd = None
for k, i in counts.items(): 
    if 'QCD_HT' not in k: continue
    # if 'htcut' in k: continue
    if counts_qcd is None: 
        counts_qcd = {}
        for key in counts[k].keys():
            counts_qcd[key] = counts[k][key][list(counts[k][key].keys())[0]]
    else:
        xsection = getXSection(k+'+RunIISummer20UL18MiniAODv2-106X_upgrade2018_realistic_v16_L1v1-v1+MINIAODSIM', '2018')
        for key in counts[k].keys():
            if len(list(counts[k][key])) > 0:
                val = counts[k][key][list(counts[k][key].keys())[0]]
                sumw = counts[k]['sumw'][list(counts[k][key].keys())[0]]
                counts_qcd[key] += val * xsection / sumw

In [6]:
counts_qcd.keys()

dict_keys(['sumw', 'total', 'cut1', 'cut2', 'cut3', 'cut4', 'cut5'])

In [7]:
columnLabels = ['Selection', r'QCD MC ', r'$M_{\phi}$ = 125 GeV', r'$M_{\phi}$ = 400 GeV', 
           r'$M_{\phi}$ = 750 GeV', r'$M_{\phi}$ = 1000 GeV']
selectionLabels = ['Total', r'$H_T$ > 1200 GeV', 
             'Trigger (PFHT1050)', 'Two ak15 Jets',
             '# Constituents > 80', r'$Sph_1$ > 0.5']

selectionKeys = list(counts_generic[list(counts_generic.keys())[0]].keys())[1:]
columnKeys = ['QCD', 'SUEP-m125-generic', 'SUEP-m400-generic', 
              'SUEP-m750-generic', 'SUEP-m1000-generic'] 

In [12]:
table = []
table.append(columnLabels)

for label, selection in zip(selectionLabels, selectionKeys):
    columns = []
    columns.append(label)
    for k in columnKeys: 
        if k == 'QCD':
            val = counts_qcd[selection]
        else:
            
            # FIXME: standardize things more
            mass = k.split("SUEP-m")[1]
            mass = mass.split('-')[0]
            mass = str(mass)
            xsec = getXSection(mass, 2018, SUEP=True)
            
            counts_sample = counts_generic[k][selection]
            sumw_sample = counts_generic[k]['sumw']
            val = round(counts_sample[list(counts_sample.keys())[0]]) * xsec
            sumw = round(sumw_sample[list(sumw_sample.keys())[0]])
            val /= sumw
        val *= lumis['2018']
        columns.append(round(val))
    table.append(columns)

In [13]:
table

[['Selection',
  'QCD MC ',
  '$M_{\\phi}$ = 125 GeV',
  '$M_{\\phi}$ = 400 GeV',
  '$M_{\\phi}$ = 750 GeV',
  '$M_{\\phi}$ = 1000 GeV'],
 ['Total', 16683981276580, 2123014, 359936, 30506, 10364],
 ['$H_T$ > 1200 GeV', 31068938, 1501, 3272, 641, 319],
 ['Trigger (PFHT1050)', 27309295, 1287, 3272, 628, 312],
 ['Two ak15 Jets', 24722683, 1287, 3199, 607, 286],
 ['# Constituents > 80', 99889, 214, 3090, 604, 285],
 ['$Sph_1$ > 0.5', 48667, 0, 2872, 577, 274]]

In [10]:
import tabulate
t = tabulate.tabulate(table,tablefmt ='latex')

def convert2LatexString(s):
    r = r'(\^\{\})'; s = re.sub(r, "^", s)
    s = re.sub(r'\\([\$\_\{\}\^])', r'\1', s)
    s = re.sub(r'(\\textbackslash{})', r'\\', s)
    return s

t = convert2LatexString(t)
with open('table.txt', 'w') as f:
    f.write(t)
t

'\\begin{tabular}{llllll}\n\\hline\n Selection           & QCD MC         & $M_{\\phi}$ = 125 GeV & $M_{\\phi}$ = 400 GeV & $M_{\\phi}$ = 750 GeV & $M_{\\phi}$ = 1000 GeV \\\\\n Total               & 16683981276580 & 2123014              & 359936               & 30506                & 10364                 \\\\\n $H_T$ \\ensuremath{>} 1200 GeV    & 31068938       & 1501                 & 3272                 & 641                  & 319                   \\\\\n Trigger (PFHT1050)  & 27309295       & 1287                 & 3272                 & 628                  & 312                   \\\\\n Two ak15 Jets       & 24722683       & 1287                 & 3199                 & 607                  & 286                   \\\\\n \\# Constituents \\ensuremath{>} 80 & 99889          & 214                  & 3090                 & 604                  & 285                   \\\\\n $Sph_1$ \\ensuremath{>} 0.5       & 48667          & 0                    & 2872                 & 577     