### Read SLHA, SModelS output and store the data in a pandas DataFrame

In [1]:
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
import numpy as np
import pandas as pd
import glob,imp,os,sys
from pandas import json_normalize
import pyslha
sys.path.append('/home/camila/smodels/')
from smodels.share.models.mssm import BSMList
from smodels.share.models.SMparticles import SMList
from smodels.base.model import Model
from smodels.particlesLoader import getParticlesFromSLHA

pd.options.mode.chained_assignment = None #Disable copy warnings

  import glob,imp,os,sys


In [2]:
BSMlist = getParticlesFromSLHA('../../2mdm_example.slha')
model = Model(BSMparticles=BSMlist, SMparticles=SMList)
print(BSMlist)

[gha, gha~, ghz, ghz~, ghwp, ghwp~, ghwm, ghwm~, ghg, ghg~, zp, ghzp, ghzp~, sd, chi]


In [3]:
slhaFolder = '../../scan/slha_files/'
smodelsFolder = '../../scan/smodels_results/'
parametersSmodels = '../../parameters_2mdm.ini'

In [None]:
!/home/camila/smodels-develop/runSModelS.py -p {parametersSmodels} -f {slhaFolder} -o {smodelsFolder}

INFO in databaseObj.loadBinaryFile() in 549: loading binary db file /home/camila/smodels-database/db3.pcl format version 214
INFO in databaseObj.loadBinaryFile() in 556: Loaded database from /home/camila/smodels-database/db3.pcl in 1.9 secs.
INFO in modelTester.loadDatabaseResults() in 497: Including non-validated results
INFO in modelTester.testPoints() in 357: Running SModelS for 160000 files with a single process. Messages will be redirected to smodels.log


In [5]:
#Convert Experimental Results list to a dictionary
data = []
removeFromDict = ['topologies outside the grid',"missing topologies",
                  "missing topologies with displaced decays", 'missing topologies with prompt decays',
                 "Asymmetric Branches","Outside Grid","Missed Topologies","Long Cascades"]
for f in glob.glob(smodelsFolder+'/*.py'):
    with open(f,'r') as ff:
        dataF = ff.read()
    dataF = dataF.replace('inf','-1')
    dataF = dataF.replace('None', '-1')
    with open(f.replace('.py','_fix.py'),'w') as ff:
        ff.write(dataF)
    f = f.replace('.py','_fix.py')
    smodelsDict = imp.load_source(f.replace('.py',''),f).smodelsOutput
    for rmKey in removeFromDict:
        if rmKey in smodelsDict:
            smodelsDict.pop(rmKey)
    if 'ExptRes' in smodelsDict:
        for res in smodelsDict['ExptRes']:
#             if res['r'] == None:
#                 res['r'] = np.nan
            if 'TxNames weights (fb)' in res:
                res.pop('TxNames weights (fb)')  
        expList = sorted(smodelsDict['ExptRes'], key=lambda pt: pt['r'],reverse=True)
        expDict = dict([['result%i'%i,val] for i,val in enumerate(expList)])
        smodelsDict['ExptRes'] = expDict
    slhaFile = smodelsDict['OutputStatus']['input file']
    dataDict = {'filename' : os.path.basename(slhaFile)}
    dataDict.update(smodelsDict)
    data.append(dataDict)

In [6]:
print(len(data))

500


In [7]:
#Convert data to flat DataFrame:
smodelsDF = json_normalize(data)

In [8]:
len(smodelsDF['filename'])

500

In [9]:
#Get SLHA data:
slhaData = []
for f in smodelsDF['filename']:
    if f == 'pq-1wlb.slha': continue
    slhaFile = os.path.join(slhaFolder,f)
    slha = pyslha.readSLHAFile(slhaFile)
    massDict = dict([[str(key),abs(val)] for key,val in slha.blocks['MASS'].items() if key >= 52])
    extparDict = dict([[str(key),val] for key,val in slha.blocks['ZPRIME'].items()])
    extparDict.update(dict([[str(key+len(extparDict)),val] for key,val in slha.blocks['BLINPUTS'].items()]))
    
    widthDict = dict([[str(key),val.totalwidth] for key,val in slha.decays.items() if key >= 52])
    BRsDict = {}
    for pdg,val in slha.decays.items():
#         if not abs(pdg) in [1000024,1000023]:
#             continue
        initialState = model.getParticlesWith(pdg=pdg)[0].label            
        BRsDict[initialState] = {}
        for dec in val.decays:
            if dec.br < 0.01: continue            
            finalState = ','.join([model.getParticlesWith(pdg=pid)[0].label for pid in sorted(dec.ids)])
            BRsDict[initialState][finalState] = dec.br
    xsec8TeV = dict([ [str(proc.pidsfinal).replace('[','').replace(']','').replace(',','_').replace(' ',''),
                   max([x.value for x in proc.get_xsecs(sqrts=8000)])*1000] 
                 for proc in slha.xsections.values()  if proc.get_xsecs(sqrts=8000)])
    xsec13TeV = dict([ [str(proc.pidsfinal).replace('[','').replace(']','').replace(',','_').replace(' ',''),
                   max([x.value for x in proc.get_xsecs(sqrts=13000)])*1000] 
                 for proc in slha.xsections.values()  if proc.get_xsecs(sqrts=13000)])    
    slhaDict = {'filename' : f, 'mass' : massDict, 'width' : widthDict, 'extpar' : extparDict, 
                'xsec8TeV(fb)' : xsec8TeV, 'xsec13TeV(fb)' : xsec13TeV, 'BRs' : BRsDict}
    slhaData.append(slhaDict)

In [10]:
#Convert to DataFrame
slhaDF = json_normalize(slhaData)
#Add total cross-sections:
xsecs13 = [x for x in list(slhaDF) if 'xsec13TeV' in x]
xsecs8 = [x for x in list(slhaDF) if 'xsec8TeV' in x]
slhaDF['totalxsec13TeV(fb)'] = slhaDF[xsecs13].sum(axis=1)
slhaDF['totalxsec8TeV(fb)'] = slhaDF[xsecs8].sum(axis=1)

In [11]:
#Merge with SModelS DataFrame
dataDF = slhaDF.merge(smodelsDF,how='inner')

In [12]:

print('Final number of data points:',dataDF.shape[0])
#print(dataDF2.columns.values.tolist()) #Print all columns names

Final number of data points: 500


In [13]:
dataDF

Unnamed: 0,filename,mass.9000006,mass.9900032,mass.9000002,mass.9000003,mass.9000004,mass.9000005,mass.9900026,width.9000002,width.9000003,...,ExptRes.result5.DataSetID,ExptRes.result5.AnalysisSqrts (TeV),ExptRes.result5.lumi (fb-1),ExptRes.result5.dataType,ExptRes.result5.r,ExptRes.result5.r_expected,ExptRes.result5.Width (GeV),ExptRes.result5.likelihood,ExptRes.result5.l_max,ExptRes.result5.l_SM
0,krwapnqu.slha,995.2105,3325.3750,91.1876,79.82436,79.82436,1500.0,1662.6880,2.4952,2.085,...,,,,,,,,,,
1,orfkwbzj.slha,868.6225,2717.1510,91.1876,79.82436,79.82436,1500.0,1358.5750,2.4952,2.085,...,,,,,,,,,,
2,ghdjxkov.slha,1443.4310,3735.0750,91.1876,79.82436,79.82436,1500.0,1867.5380,2.4952,2.085,...,,,,,,,,,,
3,tymwhlcd.slha,208.7580,431.9450,91.1876,79.82436,79.82436,1500.0,215.9725,2.4952,2.085,...,,,,,,,,,,
4,kfpcvmed.slha,199.3033,538.0356,91.1876,79.82436,79.82436,1500.0,269.0178,2.4952,2.085,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,qmblpedf.slha,263.0186,1093.2460,91.1876,79.82436,79.82436,1500.0,546.6228,2.4952,2.085,...,,,,,,,,,,
496,zxwraqey.slha,611.1910,3126.5020,91.1876,79.82436,79.82436,1500.0,1563.2510,2.4952,2.085,...,,,,,,,,,,
497,abznlekc.slha,509.4655,3644.7430,91.1876,79.82436,79.82436,1500.0,1822.3720,2.4952,2.085,...,,,,,,,,,,
498,bvckmtzq.slha,275.8141,2774.1380,91.1876,79.82436,79.82436,1500.0,1387.0690,2.4952,2.085,...,,,,,,,,,,


In [14]:
#Save DataFrame to pickle file:
dataDF.to_pickle('../../scan/smodels_results/scan_results.pcl')

In [15]:
print(len(dataDF))

250


In [15]:
6.4700e-01*5.324669e-01

0.3445060843