### Read SLHA, SModelS output and store the data in a pandas DataFrame

In [1]:
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
import numpy as np
import pandas as pd
import glob,imp,os,sys
from pandas import json_normalize
import pyslha
sys.path.append('/home/camila/smodels/')
from smodels.share.models.mssm import BSMList
from smodels.share.models.SMparticles import SMList
from smodels.base.model import Model
from smodels.particlesLoader import getParticlesFromSLHA

pd.options.mode.chained_assignment = None #Disable copy warnings

  import glob,imp,os,sys


In [2]:
BSMlist = getParticlesFromSLHA('../../slha_files_extra/all/run_01_zp_3612_dm_1412_gqv_06.slha')
model = Model(BSMparticles=BSMlist, SMparticles=SMList)
print(BSMlist)

[gha, gha~, ghz, ghz~, ghwp, ghwp~, ghwm, ghwm~, ghg, ghg~, zp, ghzp, ghzp~, sd, chi]


In [3]:
slhaFolder = '../../slha_files_rescaled/zp_only/'
smodelsFolder = '../../data/2mdm_results_rescaled/zp_only/'
parametersSmodels = '/home/camila/smodels/parameters_2mdm.ini'

In [4]:
!/home/camila/smodels/runSModelS.py -p {parametersSmodels} -f {slhaFolder} -o {smodelsFolder}

INFO in databaseObj.loadBinaryFile() in 526: loading binary db file /home/camila/smodels-database/db3.pcl format version 214
INFO in databaseObj.loadBinaryFile() in 533: Loaded database from /home/camila/smodels-database/db3.pcl in 1.9 secs.
INFO in modelTester.loadDatabaseResults() in 498: Including non-validated results
INFO in modelTester.testPoints() in 358: Running SModelS for 422 files with a single process. Messages will be redirected to smodels.log


In [5]:
#Convert Experimental Results list to a dictionary
data = []
removeFromDict = ['topologies outside the grid',"missing topologies",
                  "missing topologies with displaced decays", 'missing topologies with prompt decays',
                 "Asymmetric Branches","Outside Grid","Missed Topologies","Long Cascades"]
for f in glob.glob(smodelsFolder+'/*.py'):
    with open(f,'r') as ff:
        dataF = ff.read()
    dataF = dataF.replace('inf','-1')
    dataF = dataF.replace('None', '-1')
    with open(f.replace('.py','_fix.py'),'w') as ff:
        ff.write(dataF)
    f = f.replace('.py','_fix.py')
    smodelsDict = imp.load_source(f.replace('.py',''),f).smodelsOutput
    for rmKey in removeFromDict:
        if rmKey in smodelsDict:
            smodelsDict.pop(rmKey)
    if 'ExptRes' in smodelsDict:
        for res in smodelsDict['ExptRes']:
#             if res['r'] == None:
#                 res['r'] = np.nan
            if 'TxNames weights (fb)' in res:
                res.pop('TxNames weights (fb)')  
        expList = sorted(smodelsDict['ExptRes'], key=lambda pt: pt['r'],reverse=True)
        expDict = dict([['result%i'%i,val] for i,val in enumerate(expList)])
        smodelsDict['ExptRes'] = expDict
    slhaFile = smodelsDict['OutputStatus']['input file']
    dataDict = {'filename' : os.path.basename(slhaFile)}
    dataDict.update(smodelsDict)
    data.append(dataDict)

In [6]:
print(len(data))

422


In [7]:
#Convert data to flat DataFrame:
smodelsDF = json_normalize(data)

In [8]:
#Get SLHA data:
slhaData = []
for f in smodelsDF['filename']:
    slhaFile = os.path.join(slhaFolder,f)
    slha = pyslha.readSLHAFile(slhaFile)
    massDict = dict([[str(key),abs(val)] for key,val in slha.blocks['MASS'].items() if key >= 9000006])
    extparDict = dict([[str(key),val] for key,val in slha.blocks['ZPRIME'].items()])
    extparDict.update(dict([[str(key+len(extparDict)),val] for key,val in slha.blocks['BLINPUTS'].items()]))
    
    widthDict = dict([[str(key),val.totalwidth] for key,val in slha.decays.items() if key >= 9000006])
    BRsDict = {}
    for pdg,val in slha.decays.items():
#         if not abs(pdg) in [1000024,1000023]:
#             continue
        initialState = model.getParticlesWith(pdg=pdg)[0].label            
        BRsDict[initialState] = {}
        for dec in val.decays:
            if dec.br < 0.01: continue            
            finalState = ','.join([model.getParticlesWith(pdg=pid)[0].label for pid in sorted(dec.ids)])
            BRsDict[initialState][finalState] = dec.br
    xsec8TeV = dict([ [str(proc.pidsfinal).replace('[','').replace(']','').replace(',','_').replace(' ',''),
                   max([x.value for x in proc.get_xsecs(sqrts=8000)])*1000] 
                 for proc in slha.xsections.values()  if proc.get_xsecs(sqrts=8000)])
    xsec13TeV = dict([ [str(proc.pidsfinal).replace('[','').replace(']','').replace(',','_').replace(' ',''),
                   max([x.value for x in proc.get_xsecs(sqrts=13000)])*1000] 
                 for proc in slha.xsections.values()  if proc.get_xsecs(sqrts=13000)])    
    slhaDict = {'filename' : f, 'mass' : massDict, 'width' : widthDict, 'extpar' : extparDict, 
                'xsec8TeV(fb)' : xsec8TeV, 'xsec13TeV(fb)' : xsec13TeV, 'BRs' : BRsDict}
    slhaData.append(slhaDict)

In [9]:
#Convert to DataFrame
slhaDF = json_normalize(slhaData)
#Add total cross-sections:
xsecs13 = [x for x in list(slhaDF) if 'xsec13TeV' in x]
xsecs8 = [x for x in list(slhaDF) if 'xsec8TeV' in x]
slhaDF['totalxsec13TeV(fb)'] = slhaDF[xsecs13].sum(axis=1)
slhaDF['totalxsec8TeV(fb)'] = slhaDF[xsecs8].sum(axis=1)

In [10]:
#Merge with SModelS DataFrame
dataDF = slhaDF.merge(smodelsDF,how='inner')

In [11]:

print('Final number of data points:',dataDF.shape[0])
#print(dataDF2.columns.values.tolist()) #Print all columns names

Final number of data points: 422


In [12]:
dataDF

Unnamed: 0,filename,mass.9000006,mass.9900032,mass.9900026,width.9000006,width.9900026,width.9900032,extpar.1,extpar.2,extpar.3,...,ExptRes.result5.l_SM,ExptRes.result1.likelihood,ExptRes.result1.l_max,ExptRes.result1.l_SM,ExptRes.result0.likelihood,ExptRes.result0.l_max,ExptRes.result0.l_SM,ExptRes.result2.likelihood,ExptRes.result2.l_max,ExptRes.result2.l_SM
0,run_179_zp_2300_dm_300.slha,300.0,2300.0,1150.0,0.0,92.03608,130.38330,1.5,0.25,0.0,...,1.793298e-276,,,,,,,,,
1,run_114_zp_1500_dm_200.slha,200.0,1500.0,750.0,0.0,35.96803,84.82805,1.5,0.25,0.0,...,,1.702891e-286,1.002372e-275,1.793298e-276,,,,,,
2,run_287_zp_3200_dm_600.slha,600.0,3200.0,1600.0,0.0,188.59920,171.56780,1.5,0.25,0.0,...,,,,,,,,,,
3,run_77_zp_3750_dm_1900.slha,1900.0,3750.0,1875.0,0.0,273.04930,111.90530,1.5,0.25,0.0,...,,,,,,,,,,
4,run_354_zp_3600_dm_1600.slha,1600.0,3600.0,1800.0,0.0,242.13570,117.75830,1.5,0.25,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
417,run_257_zp_2900_dm_1200.slha,1200.0,2900.0,1450.0,0.0,128.43380,101.84660,1.5,0.25,0.0,...,1.793298e-276,,,,,,,,,
418,run_170_zp_2200_dm_500.slha,500.0,2200.0,1100.0,0.0,60.46382,112.04410,1.5,0.25,0.0,...,1.793298e-276,,,,,,,,,
419,run_384_zp_3800_dm_1000.slha,1000.0,3800.0,1900.0,0.0,283.91240,183.10900,1.5,0.25,0.0,...,,,,,,,,,,
420,run_235_zp_2800_dm_400.slha,400.0,2800.0,1400.0,0.0,144.78390,157.09180,1.5,0.25,0.0,...,1.793298e-276,,,,,,,,,


In [13]:
#Save DataFrame to pickle file:
dataDF.to_pickle('../../data/2mdm_results_rescaled/zp_only/results.pcl')

In [14]:
print(len(dataDF))

143
