In [1]:
from glob import glob
from copy import deepcopy
from io import StringIO
from itertools import chain

import pandas as pd
import os,sys
import subprocess

# BuildHPExperiments

This notebook parses experiment parameter files to generate ready to run STOMP-flu experiments: 

* Experiment scenario parameters are loaded from the passed **ExperimentSetup*.csv** file
* Flags in each scenario link to intervention code lines within **EventLines.txt**
* These flags are and selected code lines are then inserted into a template experiment execution script from *templates/
* The generated run scripts and configuration files are copied to their respective directories in **experiments/{experiment}/{scenario}/**
* Metadata of experiment parameters and file locations is written to **experiments/{experiment}/MetaData.csv**
* A monolithic bash script to run all of the experiments as well as 4 partial bash scripts for divided workflows is written to **experiments/{experiment}/RunAll*.sh**

### Scenario Script Prep

In [2]:
def prepScenarioEvents(source='ExperimentSetup.csv'):
    """Loads experiment setup and returns table with list of simulated events column"""
    
    script = pd.read_csv(source)
    script = script.fillna('').astype('str')
    script.loc[:,'Events'] = (script.Epi + ' ' + script.Interventions).str.strip().str.split(' ')
    
    return script


def prepEventLines(source='EventLines.txt'):
    """Parses EventsLines.txt file and generates dictionary of script lines for each event"""
    
    with open(source) as fileIn:
        lines = fileIn.readlines()
        
    lines = [line.replace('\n','') for line in lines if line != '\n']
    starts = [i for i,line in enumerate(lines) if line.startswith('*')]
    ends = [i-1 for i in starts[1:]+[len(lines)]]
    names = [lines[i].replace('*','').strip() for i in starts]
    lineRanges = {name:list(range(start+1,end+1)) for name,start,end in zip(names,starts,ends)}
    eventLines = {name:[lines[i]+'\n' for i in lineRange] for name,lineRange in lineRanges.items()}
    
    return eventLines


def getScenarioLines(scenarioSource='ExperimentSetup.csv',
                    eventSource='EventLines.txt'):
    """Generates scenario lines dict"""
    
    scenarioEvents = prepScenarioEvents(scenarioSource)
    eventLines = prepEventLines(eventSource)
    
    scenarioEvents = {scenario:events for scenario,events in zip(scenarioEvents.Scenario,scenarioEvents.Events)}
    scenarioLines = {scenario:[eventLines[event] for event in events] for scenario,events in scenarioEvents.items()}
    scenarioLines = {scenario:list(chain.from_iterable(eventLines)) for scenario,eventLines in scenarioLines.items()}
    
    return scenarioLines, scenarioEvents
    

### Experiment Generator Methods

In [3]:
def alignPopNets(populations,networks):
    """Returns a dict of experiment names and matched populations and networks"""
    
    matched = {'USA':(populations[0],networks[0])}
    for population in populations[1:]:
        name = population.split('/')[-1].split('.')[0]
        found = False
        for network in networks[1:]:
            if network.split('/')[-1].startswith(name):
                matched[name] = (population,network)
                found = True
                break
        if not found:
            print(name)
    return matched


def prepExperiment(experimentName,
                  population,
                  network,
                  templateLines,
                  stubFile,
                  prefixLines=[]):
    """Processing a template cfg file to generate a single experiment dir with patchsim script and config"""
    
    dirOut = 'experiments/%s/' % experimentName
    if not os.path.exists(dirOut):
        os.makedirs(dirOut)
    
    print("Prepping experiment in directory %s" % experimentName)

    with open('%s/config.patchsim' % dirOut,'w') as fileOut:
        fileOut.write(''.join(templateLines))
    
    with open(stubFile) as stubFileIn:
        patchSimStub = prefixLines + stubFileIn.readlines()
    
    with open("%s/RunPatchsim.py" % dirOut,'w') as fileOut:
        fileOut.write(''.join(patchSimStub))

### Experiment generator main function

In [4]:
def prepExperiments(toRun=['USA'],
                    prefix='',
                    template='templates/WorkingCFGTemplate.txt',
                    popDir = 'PatchFlow_data/day2/',
                    populationPattern = '*population.patchsim',
                    networkPattern = '/*aggregate.patchsim',
                    explicitDirectories = True,
                    stubFile='templates/RunSimTemplate1Rep.py',
                    setupFile='ExperimentSetupHP.csv'):
    """One and done prep all experiments function"""
    
    USANetwork = popDir + '/USA_config_min_5_max_100_alpha_400_day2_with_IATA_and_commuters.patchsim'
    populations = sorted(glob(popDir+populationPattern))
    networks = [USANetwork]+sorted(glob(popDir+networkPattern))
    
    matched = alignPopNets(populations,networks)
    scenarioLinesDict,scenarioEventsDict = getScenarioLines(setupFile)
    
    with open(template) as templateIn:
        templateLines = templateIn.readlines()
        
    if explicitDirectories:
        cwd = os.getcwd()+'/'
    else:
        cwd = ''
    if prefix != '':
        prefix = prefix+'_'
    
    for experimentName, (population,network) in matched.items():
        experimentMetaData = []
        header = ['### TEMPLATE LINES\n\n']
        separator = ['\n\n\n### EXPERIMENT PREP LINES\n\n']
        if experimentName in toRun:
            for scenario, scenarioLines in scenarioLinesDict.items():
                experimentNameOut = '%s%s/%s' % (prefix,experimentName,scenario)
                linesOut = header+templateLines+separator+scenarioLines
                
                scenarioName = experimentNameOut.split('/')[-1]
                scenarioEvents = scenarioEventsDict[scenarioName]
                epidemic = scenarioEvents[0]
                if scenarioEvents != [epidemic]:
                    interventions = scenarioEvents[1:]
                else:
                    interventions = ['']
                    
                prefixLines = ['### EXPERIMENT FLAGS ITERATION\n\nflags = %s\n\n### EXPERIMENT EXECUTION STUB\n\n' % str(set(scenarioEvents))]
                
                prepExperiment(experimentNameOut,
                               population,
                               network,
                               linesOut,
                               stubFile,
                               prefixLines)
                
                experimentMeta = {'Name':experimentNameOut.split('/')[-2],
                                  'Scenario':scenarioName,
                                  'Epidemic':epidemic,
                                  'Interventions':' '.join(interventions),
                                  'ConfigFile':'%sexperiments/%s/config.patchsim' % (cwd,experimentNameOut),
                                  'RunScript':'%sexperiments/%s/RunPatchsim.py' % (cwd,experimentNameOut),
                                  'PopulationFile':population,
                                  'Network':network,
                                  'MergedOutput':'%sexperiments/%s/MergedSamples.csv' % (cwd,experimentNameOut)}
                experimentMetaData.append(experimentMeta)
    
            metaDataDf = pd.DataFrame(experimentMetaData)
            print('experiments/%s%s/MetaData.csv' % (prefix,experimentName))
            metaDataFileOut = 'experiments/%s%s/MetaData.csv' % (prefix,experimentName)
            metaDataDf.to_csv(metaDataFileOut,index=False)
    print("Writing experiment metadata to", metaDataFileOut)

    
def getRunScript(metaDataRef,
                   multiThread=False,
                   asScript=True,
                   skip=set(),
                   nScripts=4):
    """Loads meta data and runs all experiments"""
    meta = pd.read_csv(metaDataRef)
    startDir = os.getcwd()
    runScripts = meta.RunScript
    script = []
    for runScript in runScripts:
        cell = runScript.split('/')[-2]
        runDir = '/'.join(runScript.split('/')[:-1])
        scriptCmd = 'python %s' % runScript.split('/')[-1]
        os.chdir(runDir)
        if not cell in script:
            if asScript:
                script.append('cd %s' % runDir)
                script.append("echo 'Running cell %s'" % cell)
                script.append(scriptCmd)
            else:
                print("Running scenario %s" % cell)
                if multiThread:
                    subprocess.Popen(scriptCmd.split())
                else:
                    subprocess.call(scriptCmd, shell=True)
        else:
            print("Skipping",cell)
            
    os.chdir(startDir)
    if script != []:
        scriptRef = metaDataRef.replace('MetaData.csv','RunAll.sh')
        print("Writing script to", scriptRef)
        scriptTxt = '\n'.join(script)
        with open(scriptRef,'w') as scriptOut:
            scriptOut.write(scriptTxt)
        if nScripts != 1:
            chunkedScripts = [script[i:i+3] for i in range(0,len(script),3)]
            nCells = len(chunkedScripts)
            for i in range(nScripts):
                subScriptTxt = '\n'.join(['\n'.join(chunkedScripts[i+j]) for j in range(0,nCells-1,nScripts)])
                subScriptRef = scriptRef.replace('.sh',f'{i}.sh')
                with open(subScriptRef,'w') as scriptOut:
                    scriptOut.write(subScriptTxt)


### Experiment generation and execution

#### RL Scenario

In [5]:
prepExperiments(prefix='WorkingTemplate2009v15',
               template='templates/WorkingCFGTemplate.txt',
               stubFile='templates/RunSimTemplate2009v15.py',
               setupFile='ExperimentSetup2009.csv')

getRunScript('experiments/WorkingTemplate2009v15_USA/MetaData.csv',nScripts=4)

Prepping experiment in directory WorkingTemplate2009v15_USA/RL_01
Prepping experiment in directory WorkingTemplate2009v15_USA/RL_02
Prepping experiment in directory WorkingTemplate2009v15_USA/RL_03
Prepping experiment in directory WorkingTemplate2009v15_USA/RL_04
Prepping experiment in directory WorkingTemplate2009v15_USA/RL_05
Prepping experiment in directory WorkingTemplate2009v15_USA/RL_06
Prepping experiment in directory WorkingTemplate2009v15_USA/RL_07
Prepping experiment in directory WorkingTemplate2009v15_USA/RL_08
experiments/WorkingTemplate2009v15_USA/MetaData.csv
Writing experiment metadata to experiments/WorkingTemplate2009v15_USA/MetaData.csv
Writing script to experiments/WorkingTemplate2009v15_USA/RunAll.sh


#### HP Scenario

In [6]:
prepExperiments(prefix='WorkingTemplateP9',
               template='templates/WorkingCFGTemplate.txt',
               stubFile='templates/HP02RandomPopCalib.py')

getRunScript('experiments/WorkingTemplateP9_USA/MetaData.csv',
              multiThread=False)

Prepping experiment in directory WorkingTemplateP9_USA/HP_01
Prepping experiment in directory WorkingTemplateP9_USA/HP_02
Prepping experiment in directory WorkingTemplateP9_USA/HP_03
Prepping experiment in directory WorkingTemplateP9_USA/HP_04
Prepping experiment in directory WorkingTemplateP9_USA/HP_05
Prepping experiment in directory WorkingTemplateP9_USA/HP_06
Prepping experiment in directory WorkingTemplateP9_USA/HP_07
Prepping experiment in directory WorkingTemplateP9_USA/HP_08
Prepping experiment in directory WorkingTemplateP9_USA/HP_09
Prepping experiment in directory WorkingTemplateP9_USA/HP_10
Prepping experiment in directory WorkingTemplateP9_USA/HP_11
Prepping experiment in directory WorkingTemplateP9_USA/HP_12
Prepping experiment in directory WorkingTemplateP9_USA/HP_13
Prepping experiment in directory WorkingTemplateP9_USA/HP_14
Prepping experiment in directory WorkingTemplateP9_USA/HP_15
Prepping experiment in directory WorkingTemplateP9_USA/HP_16
Prepping experiment in d