### Read SLHA, SModelS output and store the data in a pandas DataFrame

In [1]:
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
import pandas as pd
import glob,os
import numpy as np
from pandas import json_normalize
from importlib import util
import pyslha
import sys
sys.path.append(os.path.expanduser('~/smodels'))
from smodels.share.models.mssm import BSMList
from smodels.share.models.SMparticles import SMList
from smodels.base.model import Model
from smodels.particlesLoader import getParticlesFromSLHA

pd.options.mode.chained_assignment = None #Disable copy warnings

In [2]:
BSMlist = getParticlesFromSLHA('./2mdm_example.slha')
model = Model(BSMparticles=BSMlist, SMparticles=SMList)
print(BSMlist)

[gha, gha~, ghz, ghz~, ghwp, ghwp~, ghwm, ghwm~, ghg, ghg~, zp, ghzp, ghzp~, sd, chi]


In [3]:
slhaFolder = '../data/slha_files'
smodelsFolder = '../data/smodels_results'
parametersSmodels = './smodels_parameters_2mdm.ini'

In [4]:
#Convert Experimental Results list to a dictionary
data = []
removeFromDict = ['topologies outside the grid',"missing topologies",
                  "missing topologies with displaced decays", 'missing topologies with prompt decays',
                 "Asymmetric Branches","Outside Grid","Missed Topologies","Long Cascades"]
for f in glob.glob(smodelsFolder+'/*.py'):

    spec = util.spec_from_file_location("smodelsOutput", f)
    smodelsOutput = util.module_from_spec(spec)
    spec.loader.exec_module(smodelsOutput)
    smodelsDict = smodelsOutput.smodelsOutput
    for rmKey in removeFromDict:
        if rmKey in smodelsDict:
            smodelsDict.pop(rmKey)
    if 'ExptRes' in smodelsDict:
        # for res in smodelsDict['ExptRes']:
#             if res['r'] == None:
#                 res['r'] = np.nan
            # if 'TxNames weights (fb)' in res:
                # res.pop('TxNames weights (fb)')  
        expList = sorted(smodelsDict['ExptRes'], 
                            key=lambda pt: (pt['r'] is not None,pt['r']),
                            reverse=True)
        expDict = dict([['result%i'%i,val] for i,val in enumerate(expList)])
        smodelsDict['ExptRes'] = expDict
    slhaFile = f.replace('.py','')
    dataDict = {'filename' : os.path.basename(slhaFile)}
    dataDict.update(smodelsDict)
    data.append(dataDict)

In [5]:
print(len(data))

21000


In [6]:
#Convert data to flat DataFrame:
smodelsDF = json_normalize(data)

In [7]:
#Get SLHA data:
slhaData = []
for f in smodelsDF['filename']:
    slhaFile = os.path.join(slhaFolder,f)
    slha = pyslha.readSLHAFile(slhaFile)
    massDict = dict([[str(key),abs(val)] for key,val in slha.blocks['MASS'].items() if key >= 52])
    extparDict = dict([[str(key),val] for key,val in slha.blocks['NPINPUTS'].items()])
    
    widthDict = dict([[str(key),val.totalwidth] for key,val in slha.decays.items() if key >= 52])
    BRsDict = {}
    for pdg,val in slha.decays.items():
        initialState = model.getParticlesWith(pdg=pdg)[0].label            
        BRsDict[initialState] = {}
        for dec in val.decays:
            if dec.br < 1e-7: continue            
            finalState = ','.join([model.getParticlesWith(pdg=pid)[0].label for pid in sorted(dec.ids)])
            BRsDict[initialState][finalState] = dec.br
    xsec8TeV = dict([ [str(proc.pidsfinal).replace('[','').replace(']','').replace(',','_').replace(' ',''),
                   max([x.value for x in proc.get_xsecs(sqrts=8000)])*1000] 
                 for proc in slha.xsections.values()  if proc.get_xsecs(sqrts=8000)])
    xsec13TeV = dict([ [str(proc.pidsfinal).replace('[','').replace(']','').replace(',','_').replace(' ',''),
                   max([x.value for x in proc.get_xsecs(sqrts=13000)])*1000] 
                 for proc in slha.xsections.values()  if proc.get_xsecs(sqrts=13000)])    
    slhaDict = {'filename' : f, 'mass' : massDict, 'width' : widthDict, 'extpar' : extparDict, 
                'xsec8TeV(fb)' : xsec8TeV, 'xsec13TeV(fb)' : xsec13TeV, 'BRs' : BRsDict}
    slhaData.append(slhaDict)

In [8]:
len(slhaData)

21000

In [9]:
#Convert to DataFrame
slhaDF = json_normalize(slhaData)
#Add total cross-sections:
xsecs13 = [x for x in list(slhaDF) if 'xsec13TeV' in x]
xsecs8 = [x for x in list(slhaDF) if 'xsec8TeV' in x]
slhaDF['totalxsec13TeV(fb)'] = slhaDF[xsecs13].sum(axis=1)
slhaDF['totalxsec8TeV(fb)'] = slhaDF[xsecs8].sum(axis=1)

In [10]:
#Merge with SModelS DataFrame
dataDF = slhaDF.merge(smodelsDF,how='inner')

In [11]:
print('Final number of data points:',dataDF.shape[0])
#print(dataDF2.columns.values.tolist()) #Print all columns names

Final number of data points: 21000


In [12]:
dataDF

Unnamed: 0,filename,mass.9000006,mass.9900026,mass.9900032,mass.9000002,mass.9000003,mass.9000004,mass.9000005,width.9000002,width.9000003,...,ExptRes.result4.TxNames weights (fb).TRS1,ExptRes.result6.TxNames weights (fb).TRV1qq,ExptRes.result4.TxNames weights (fb).TRV1bb,ExptRes.result2.TxNames weights (fb).TRV1bb,ExptRes.result3.TxNames weights (fb).TRV1jj,ExptRes.result1.TxNames weights (fb).TRV1jj,ExptRes.result2.TxNames weights (fb).TRV1tt,ExptRes.result3.TxNames weights (fb).TRV1qq,ExptRes.result7.TxNames weights (fb).TRV1qq,ExptRes.result8.TxNames weights (fb).TRV1qq
0,scan_3_g7btudrp.slha,65.0000,1238.2430,2476.4850,91.1876,79.82436,79.82436,1500.0,2.4952,2.085,...,,,,,,,,,,
1,scan_1_nw8r40ie.slha,293.7867,800.1534,1600.3070,91.1876,79.82436,79.82436,1500.0,2.4952,2.085,...,,,,,,,,,,
2,scan_3_zhbq06_k.slha,65.0000,878.2694,1756.5390,91.1876,79.82436,79.82436,1500.0,2.4952,2.085,...,,,,,,,,,,
3,scan_3_ej7m2n85.slha,65.0000,1395.8450,2791.6910,91.1876,79.82436,79.82436,1500.0,2.4952,2.085,...,,,,,,,,,,
4,scan_3_r53swaf4.slha,65.0000,354.2981,708.5962,91.1876,79.82436,79.82436,1500.0,2.4952,2.085,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20995,scan_1_msjl0dte.slha,352.3017,1087.9490,2175.8990,91.1876,79.82436,79.82436,1500.0,2.4952,2.085,...,,,,,,,,,,
20996,scan_1_bfpr9gp8.slha,893.0319,927.1490,1854.2980,91.1876,79.82436,79.82436,1500.0,2.4952,2.085,...,,,,,,,,,,
20997,scan_3_ctrf9hep.slha,65.0000,939.9400,1879.8800,91.1876,79.82436,79.82436,1500.0,2.4952,2.085,...,4.510547e-11,,,,,,,,,
20998,scan_1_qzp8ef1_.slha,259.6814,1266.5660,2533.1310,91.1876,79.82436,79.82436,1500.0,2.4952,2.085,...,,0.873524,,,,,,,,


In [13]:
#Save DataFrame to pickle file:
dataDF.to_pickle('../data/smodels_results.pcl')