### Read SLHA, SModelS output and store the data in a pandas DataFrame

In [1]:
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
import numpy as np
import pandas as pd
import glob,imp,os,sys
from pandas import json_normalize
import pyslha
from smodels.share.models.mssm import BSMList
from smodels.share.models.SMparticles import SMList
from smodels.base.model import Model
from smodels.particlesLoader import getParticlesFromSLHA

pd.options.mode.chained_assignment = None #Disable copy warnings

  import glob,imp,os,sys


In [2]:
BSMlist = getParticlesFromSLHA('../2mdm_example.slha')
model = Model(BSMparticles=BSMlist, SMparticles=SMList)
print(BSMlist)

[gha, gha~, ghz, ghz~, ghwp, ghwp~, ghwm, ghwm~, ghg, ghg~, zp, ghzp, ghzp~, sd, chi]


In [3]:
slhaFolder = '../data/slha_files/slha_files_cms_couplings'
smodelsFolder = '~/smodels-develop/smodels_results_monojet_atlas'
parametersSmodels = '../../parameters_2mdm.ini'

In [5]:
#Convert Experimental Results list to a dictionary
data = []
removeFromDict = ['topologies outside the grid',"missing topologies",
                  "missing topologies with displaced decays", 'missing topologies with prompt decays',
                 "Asymmetric Branches","Outside Grid","Missed Topologies","Long Cascades"]
for f in glob.glob(smodelsFolder+'/*.py'):
    with open(f,'r') as ff:
        dataF = ff.read()
    dataF = dataF.replace('inf','-1')
    dataF = dataF.replace('None', '-1')
    with open(f.replace('.py','_fix.py'),'w') as ff:
        ff.write(dataF)
    f = f.replace('.py','_fix.py')
    smodelsDict = imp.load_source(f.replace('.py',''),f).smodelsOutput
    for rmKey in removeFromDict:
        if rmKey in smodelsDict:
            smodelsDict.pop(rmKey)
    if 'ExptRes' in smodelsDict:
        for res in smodelsDict['ExptRes']:
#             if res['r'] == None:
#                 res['r'] = np.nan
            if 'TxNames weights (fb)' in res:
                res.pop('TxNames weights (fb)')  
        expList = sorted(smodelsDict['ExptRes'], key=lambda pt: pt['r'],reverse=True)
        expDict = dict([['result%i'%i,val] for i,val in enumerate(expList)])
        smodelsDict['ExptRes'] = expDict
    slhaFile = f.split('/')[-1][:-7]
    dataDict = {'filename' : os.path.basename(slhaFile)}
    dataDict.update(smodelsDict)
    data.append(dataDict)

In [6]:
print(len(data))

3000


In [7]:
#Convert data to flat DataFrame:
smodelsDF = json_normalize(data)

In [8]:
#Get SLHA data:
slhaData = []
for f in smodelsDF['filename']:
    slhaFile = os.path.join(slhaFolder,f)
    slha = pyslha.readSLHAFile(slhaFile)
    massDict = dict([[str(key),abs(val)] for key,val in slha.blocks['MASS'].items() if key >= 52])
    extparDict = dict([[str(key),val] for key,val in slha.blocks['ZPRIME'].items()])
    extparDict.update(dict([[str(key+len(extparDict)),val] for key,val in slha.blocks['BLINPUTS'].items()]))
    
    widthDict = dict([[str(key),val.totalwidth] for key,val in slha.decays.items() if key >= 52])
    BRsDict = {}
    for pdg,val in slha.decays.items():
#         if not abs(pdg) in [1000024,1000023]:
#             continue
        initialState = model.getParticlesWith(pdg=pdg)[0].label            
        BRsDict[initialState] = {}
        for dec in val.decays:
            if dec.br < 0.01: continue            
            finalState = ','.join([model.getParticlesWith(pdg=pid)[0].label for pid in sorted(dec.ids)])
            BRsDict[initialState][finalState] = dec.br
    xsec8TeV = dict([ [str(proc.pidsfinal).replace('[','').replace(']','').replace(',','_').replace(' ',''),
                   max([x.value for x in proc.get_xsecs(sqrts=8000)])*1000] 
                 for proc in slha.xsections.values()  if proc.get_xsecs(sqrts=8000)])
    xsec13TeV = dict([ [str(proc.pidsfinal).replace('[','').replace(']','').replace(',','_').replace(' ',''),
                   max([x.value for x in proc.get_xsecs(sqrts=13000)])*1000] 
                 for proc in slha.xsections.values()  if proc.get_xsecs(sqrts=13000)])    
    slhaDict = {'filename' : f, 'mass' : massDict, 'width' : widthDict, 'extpar' : extparDict, 
                'xsec8TeV(fb)' : xsec8TeV, 'xsec13TeV(fb)' : xsec13TeV, 'BRs' : BRsDict}
    slhaData.append(slhaDict)

In [9]:
len(slhaData)

3000

In [10]:
#Convert to DataFrame
slhaDF = json_normalize(slhaData)
#Add total cross-sections:
xsecs13 = [x for x in list(slhaDF) if 'xsec13TeV' in x]
xsecs8 = [x for x in list(slhaDF) if 'xsec8TeV' in x]
slhaDF['totalxsec13TeV(fb)'] = slhaDF[xsecs13].sum(axis=1)
slhaDF['totalxsec8TeV(fb)'] = slhaDF[xsecs8].sum(axis=1)

In [11]:
#Merge with SModelS DataFrame
dataDF = slhaDF.merge(smodelsDF,how='inner')

In [12]:
print('Final number of data points:',dataDF.shape[0])
#print(dataDF2.columns.values.tolist()) #Print all columns names

Final number of data points: 3000


In [13]:
dataDF

Unnamed: 0,filename,mass.9000006,mass.9900032,mass.9000002,mass.9000003,mass.9000004,mass.9000005,mass.9900026,width.9000002,width.9000003,...,ExptRes.result1.DataSetID,ExptRes.result1.AnalysisSqrts (TeV),ExptRes.result1.lumi (fb-1),ExptRes.result1.dataType,ExptRes.result1.r,ExptRes.result1.r_expected,ExptRes.result1.Width (GeV),ExptRes.result1.nll,ExptRes.result1.nll_min,ExptRes.result1.nll_SM
0,scan__f8y3i2s.slha,914.8673,2784.059,91.1876,79.82436,79.82436,1500.0,1392.0290,2.4952,2.085,...,SR2j_2200,13.0,139.0,efficiencyMap,0.066171,0.064852,"[(zp, 114.6981), (chi, stable)]",9.226668,9.200812,9.200812
1,scan_4ypoffuz.slha,376.0053,1650.381,91.1876,79.82436,79.82436,1500.0,825.1907,2.4952,2.085,...,SR2j_2200,13.0,139.0,efficiencyMap,0.855924,0.838864,"[(zp, 80.12207), (chi, stable)]",10.663010,9.200812,9.200812
2,scan_6e4fbdz2.slha,528.3607,1105.230,91.1876,79.82436,79.82436,1500.0,552.6151,2.4952,2.085,...,SR2j_2200,13.0,139.0,efficiencyMap,0.152012,0.148983,"[(zp, 33.69929), (chi, stable)]",9.282072,9.200812,9.200812
3,scan_ben6551g.slha,344.9137,1478.293,91.1876,79.82436,79.82436,1500.0,739.1467,2.4952,2.085,...,SR2j_2200,13.0,139.0,efficiencyMap,1.156362,1.133313,"[(zp, 71.23618), (chi, stable)]",11.752070,9.200812,9.200812
4,scan_91h9ls2y.slha,185.8823,1588.865,91.1876,79.82436,79.82436,1500.0,794.4326,2.4952,2.085,...,SR2j_2200,13.0,139.0,efficiencyMap,1.078187,1.056696,"[(zp, 86.14029), (chi, stable)]",11.440080,9.200812,9.200812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2995,scan_7gxn69ok.slha,938.8013,2957.996,91.1876,79.82436,79.82436,1500.0,1478.9980,2.4952,2.085,...,SR2j_2200,13.0,139.0,efficiencyMap,0.037718,0.036966,"[(zp, 124.4713), (chi, stable)]",9.213751,9.200812,9.200812
2996,scan_ei8486su.slha,390.7652,2343.078,91.1876,79.82436,79.82436,1500.0,1171.5390,2.4952,2.085,...,SR2j_2200,13.0,139.0,efficiencyMap,0.236524,0.231810,"[(zp, 121.9931), (chi, stable)]",9.360703,9.200812,9.200812
2997,scan_ry02bqa7.slha,1042.5760,2285.083,91.1876,79.82436,79.82436,1500.0,1142.5420,2.4952,2.085,...,SR2j_2200,13.0,139.0,efficiencyMap,0.038768,0.037996,"[(zp, 72.3372), (chi, stable)]",9.214180,9.200812,9.200812
2998,scan_f7mw_9sh.slha,929.7981,2308.938,91.1876,79.82436,79.82436,1500.0,1154.4690,2.4952,2.085,...,SR2j_2200,13.0,139.0,efficiencyMap,0.096758,0.094830,"[(zp, 81.65528), (chi, stable)]",9.243580,9.200812,9.200812


In [14]:
#Save DataFrame to pickle file:
dataDF.to_pickle('../data/pcl_files/smodels_results_monojet_atlas_multibin.pcl')