### Read SLHA, SModelS and micrOmegas output and store the data in a pandas DataFrame

In [1]:
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
import numpy as np
import pandas as pd
import glob,imp,os,sys
from pandas import json_normalize
import pyslha
sys.path.append('../smodels')
from smodels.particlesLoader import BSMList
from smodels.share.models.SMparticles import SMList
from smodels.theory.model import Model
pd.options.mode.chained_assignment = None #Disable copy warnings

In [43]:
def microParser(mfile):
    outputDict = {}
    if not os.path.isfile(mfile):
        print('File %s not found' %mfile)
        return outputDict
    
    with open(mfile,'r') as f:
        output = f.read()
        #Get block for physical constraints
        physConst = output.split('==== Physical Constraints: =====')[1].split('===')[0]
        obs_dict = {}
        for l in physConst.split('\n'):
            if not l.strip(): continue
            if '=' in l:                
                line = l[:]
                while '=' in line:
                    label = line.split('=',1)[0].strip()
                    line = line.split('=',1)[1]
                    val = line.split()[0].strip() #Get value
                    line = line.replace(val,'',1)
                    try:
                        val = eval(val)
                    except:
                        pass
                    obs_dict[label] = val
            elif 'MassLimits' in l:                
                if l.split()[1].strip() == 'OK':
                    val = True
                else:
                    val = False
                obs_dict['MassLimits'] = val
                break
    
        #Get block for relic density
        relic = output.split('==== Calculation of relic density')[1].split('\n')[1:2]       
        line = relic[0]
        while '=' in line:
            if not line.strip():
                continue
            label = line.split('=',1)[0].strip()
            line = line.split('=',1)[1]
            val = line.split()[0].strip() #Get value
            line = line.replace(val,'',1)
            try:
                val = eval(val)
            except:
                pass
            obs_dict[label] = val
            
            
        #Get block for CDM nucleon cross-section
        CDMxsec = output.split('==== ~o1-nucleon cross sections[pb] ====')[1].split('===')[0]
        CDMxsec = CDMxsec.split('\n')
        for l in CDMxsec:
            l = l.strip()
            if not l: continue
            nucleons = ['proton','neutron']
            for nucleon in nucleons:
                if nucleon in l:
                    l = l.replace(nucleon,'')
                    l = l.strip()
                    l = l.split()
                    labels = ['%s_%s'%(nucleon,v) for v in l[::2]]
                    vals = [eval(v) for v in l[1::2]]
                    for i,label in enumerate(labels):
                        obs_dict[label] = vals[i]
    return obs_dict

In [42]:
print(microParser('../EWino/data/micromegas_scanRandom/ew_zbwbb1b_.micro'))

{'deltartho': 4.6e-05, 'bsgnlo': 0.000328, 'bsmumu': 3.11e-09, 'btaunu': 0.998, 'dtaunu': 0.0517, 'dmunu': 0.00533, 'Rl23': 1.0, 'MassLimits': True, 'Xf': 20.2, 'Omega': 20.2, 'proton_SI': 1.024e-10, 'proton_SD': 3.629e-07, 'neutron_SI': 1.046e-10, 'neutron_SD': 2.798e-07}


In [3]:
model = Model(BSMparticles=BSMList, SMparticles=SMList)

In [4]:
slhaFolder = '../EWino/data/slha_scanRandom'
smodelsFolder = '../EWino/data/smodels_scanRandom'
microFolder = '../EWino/data/micromegas_scanRandom'

In [5]:
#Convert Experimental Results list to a dictionary
data = []
removeFromDict = ['topologies outside the grid',"missing topologies",
                  "missing topologies with displaced decays", 'missing topologies with prompt decays',
                 "Asymmetric Branches","Outside Grid","Missed Topologies","Long Cascades"]
for f in glob.glob(smodelsFolder+'/*.py'):
    smodelsDict = imp.load_source(f.replace('.py',''),f).smodelsOutput
    for rmKey in removeFromDict:
        if rmKey in smodelsDict:
            smodelsDict.pop(rmKey)
    if 'ExptRes' in smodelsDict:
        for res in smodelsDict['ExptRes']:
            if 'TxNames weights (fb)' in res:
                res.pop('TxNames weights (fb)')        
        expList = sorted(smodelsDict['ExptRes'], key=lambda pt: pt['r'],reverse=True)
        expDict = dict([['result%i'%i,val] for i,val in enumerate(expList)])
        smodelsDict['ExptRes'] = expDict
    slhaFile = smodelsDict['OutputStatus']['input file']
    dataDict = {'filename' : os.path.basename(slhaFile)}
    dataDict.update(smodelsDict)
    data.append(dataDict)

In [6]:
print(len(data))

96669


In [7]:
#Convert data to flat DataFrame:
smodelsDF = json_normalize(data)

In [8]:
#Get SLHA data:
slhaData = []
for f in smodelsDF['filename']:
    slhaFile = os.path.join(slhaFolder,f)
    slha = pyslha.readSLHAFile(slhaFile)
    massDict = dict([[str(key),abs(val)] for key,val in slha.blocks['MASS'].items() if key > 25])
    extparDict = dict([[str(key),val] for key,val in slha.blocks['EXTPAR'].items()])
    widthDict = dict([[str(key),val.totalwidth] for key,val in slha.decays.items() if key > 25])
    BRsDict = {}
    for pdg,val in slha.decays.items():
        if not abs(pdg) in [1000024,1000023]:
            continue
        initialState = model.getParticlesWith(pdg=pdg)[0].label            
        BRsDict[initialState] = {}
        for dec in val.decays:
            if dec.br < 0.01: continue            
            finalState = ','.join([model.getParticlesWith(pdg=pid)[0].label for pid in sorted(dec.ids)])
            BRsDict[initialState][finalState] = dec.br
    xsec8TeV = dict([ [str(proc.pidsfinal).replace('[','').replace(']','').replace(',','_').replace(' ',''),
                   max([x.value for x in proc.get_xsecs(sqrts=8000)])*1000] 
                 for proc in slha.xsections.values()  if proc.get_xsecs(sqrts=8000)])
    xsec13TeV = dict([ [str(proc.pidsfinal).replace('[','').replace(']','').replace(',','_').replace(' ',''),
                   max([x.value for x in proc.get_xsecs(sqrts=13000)])*1000] 
                 for proc in slha.xsections.values()  if proc.get_xsecs(sqrts=13000)])    
    slhaDict = {'filename' : f, 'mass' : massDict, 'width' : widthDict, 'extpar' : extparDict, 
                'xsec8TeV(fb)' : xsec8TeV, 'xsec13TeV(fb)' : xsec13TeV, 'BRs' : BRsDict}
    slhaData.append(slhaDict)

In [9]:
#Convert to DataFrame
slhaDF = json_normalize(slhaData)
#Add total cross-sections:
xsecs13 = [x for x in list(slhaDF) if 'xsec13TeV' in x]
xsecs8 = [x for x in list(slhaDF) if 'xsec8TeV' in x]
slhaDF['totalxsec13TeV(fb)'] = slhaDF[xsecs13].sum(axis=1)
slhaDF['totalxsec8TeV(fb)'] = slhaDF[xsecs8].sum(axis=1)

In [10]:
#Merge with SModelS DataFrame
dataDF = slhaDF.merge(smodelsDF,how='inner')
# print('Final number of data points:',dataDF.shape[0])
#print(dataDF.columns.values.tolist()) #Print all columns names

In [46]:
#Get Micromegas data:
microData = []
for f in smodelsDF['filename']:
    microFile = os.path.join(microFolder,f.replace('.slha','.micro'))
    obs_dict = microParser(microFile)
    obs_dict['filename'] = f
    microData.append(obs_dict)

In [47]:
#Convert to DataFrame
microDF = json_normalize(microData)
#Merge with SModelS DataFrame
dataDF2 = microDF.merge(dataDF,how='inner')
print('Final number of data points:',dataDF2.shape[0])
#print(dataDF2.columns.values.tolist()) #Print all columns names

Final number of data points: 96669


In [49]:
#Save DataFrame to pickle file:
# dataDF.to_pickle('WinoResultsv2.pcl')

dataDF2.to_pickle('EWino_scanRandom.pcl')