In [2]:
import os, sys, time, subprocess
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# set locations for working files
if len(sys.argv) != 3:
    print("Usage: python3 multi-sim.py <automation_dir> <simulators>")
    print('Assuming testing directories')
    automation_dir = "/mnt/analysis/e17023/Adam/GADGET2/"
    num_sims = 4
else:
    # Automation directory
    automation_dir = sys.argv[1]
    
    # Number of simulators
    num_sims = sys.argv[2]
    try:
        num_sims = int(num_sims)
    except ValueError:
        print("Error: <simulators> must be an integer")
        sys.exit(1)
    if num_sims < 1:
        print("Error: <simulators> must be positive")
        sys.exit(1)
    if num_sims > 10:
        print("Error: <simulators> must be less than 10")
        # highest tested value so far, deminishing returns?
        # probably should talk to IT before increasing this

In [4]:
def start_sim(sim_dir, main_dir, reset = False):
    # test if sim_dir exists      
    if not os.path.isdir(sim_dir):
        os.system("mkdir " + sim_dir)
        print("Created directory " + sim_dir)
    else:
        if reset: # delete everything
            print("Deleting directory " + sim_dir)
            os.system("rm -rf " + sim_dir)
            os.system("mkdir " + sim_dir)
        else:
            #print("Directory " + sim_dir + " already exists")
            # delete old output files
            os.system("rm -rf " + sim_dir + "out/*")
            os.system("rm -rf " + sim_dir + "*.tmp") # delete temporary files

    
    # copy template files
    os.system("cp " + main_dir + "/simInput/templates/* " + sim_dir)
    
    # convert queue and process notebooks to scripts
    os.system("python3 " + main_dir + "/simInput/nb2py.py " + main_dir + "simInput/multi/queue.ipynb")
    os.system("mv " + main_dir + "simInput/multi/queue.py " + sim_dir)
    os.system("python3 " + main_dir + "/simInput/nb2py.py " + main_dir + "simInput/multi/process.ipynb")
    os.system("mv " + main_dir + "simInput/multi/process.py " + sim_dir)
    os.system("cp " + main_dir + "simInput/multi/sim.sh " + sim_dir)
    os.system("cp " + main_dir + "simInput/multi/param.csv " + sim_dir)
    
    # start simulation in background and return
    os.chdir(sim_dir)
    os.system("chmod +x sim.sh")
    os.system("nohup sim.sh &") # run in background
    os.chdir(main_dir)
    return None

In [3]:
for simi in range(num_sims): # start all simulations in parallel
    start_sim(f'{automation_dir}Sims/{simi}/', automation_dir, reset = False)
    print(f'Started simulator {simi}')
    time.sleep(1)

In [None]:
while True: # simulation management loop    
    statuses = [] # list of statuses for each simulation
    for simi in range(num_sims):
        param_df = pd.read_csv(automation_dir + "simInput/parameters.csv") # read any updates to parameters.csv
        
        sim_dir = f'{automation_dir}Sims/{simi}/' # directory for current simulation
        sparam = pd.read_csv(sim_dir + "param.csv") # read individual parameter file for current simulation
        
        # check each simulation for status files
        tmp_files = [0]
        for file in os.listdir(sim_dir):
            if file.endswith(".tmp"):
                tmp_files.append(file)
        if tmp_files[-1] == 0:
            tmp_files = ['STOP.tmp']
        statuses.append(tmp_files[-1])
        
        # if simulation is waiting, process output files and queue next run
        if statuses[-1] == 'WAIT.tmp':
            completed_sims = sparam[sparam['Status'] >= 2] # get list of completed sims
            for i in range(len(completed_sims)): # process each completed simulation
                sim_name = completed_sims.iloc[i]['Sim']
                # move sim's output files to main output directory
                os.system(f"mv -f {sim_dir}out/hdf5/{sim_name}.h5 {automation_dir}simOutput/hdf5/")
                os.system(f"mv -f {sim_dir}out/images/{sim_name}_* {automation_dir}simOutput/images/")
                os.system(f"mv -f {sim_dir}out/gifs/{sim_name}.gif {automation_dir}simOutput/gifs/")
                print(f"{sim_name} completed on simulator {simi} at {time.strftime('%H:%M:%S')}")
                param_df.loc[param_df['Sim'] == sim_name, 'Status'] = sparam.loc[sparam['Sim'] == sim_name, 'Status'].values[0] # update status in parameters.csv
                param_df.to_csv(automation_dir + "simInput/parameters.csv", index = False)
            
            # queue next run if there are more sims to run
            if len(param_df[param_df['Status'] == 0]) > 0:
                next_params = param_df[param_df['Status'] == 0].head(1) # get parameters for next sim
                next_params.to_csv(sim_dir + "param.csv", index = False) # write parameters to sim's param.csv
                param_df.loc[param_df['Sim'] == next_params['Sim'].iloc[0], 'Status'] = 1 # update status in parameters.csv
                print(f"{next_params['Sim'].iloc[0]} queued on simulator {simi} at {time.strftime('%H:%M:%S')}") # print queued sim number
            else: # no more sims to queue, stop simulation iteration
                with open(sim_dir + "STOP.tmp", "w") as f:
                    print(f"Simulator {simi} stopped at {time.strftime('%H:%M:%S')}")
                    f.write("STOP")
        param_df.to_csv(automation_dir + "simInput/parameters.csv", index = False) # update parameters.csv with any changes

    status_df = pd.DataFrame({'Sim' : [simi for simi in range(num_sims)], 'Status' : statuses})
    status_df.to_csv(automation_dir + "simInput/status.csv", index = False)
    
    if all([status == 'STOP.tmp' for status in statuses]):
        print("All simulations complete")
        break
    time.sleep(1) # wait 5 seconds before checking sims again