Asynchronous parallel processing of FVS keyfiles

In [1]:
import os
import glob
import subprocess
import ipyparallel as ipp
import shutil

Gather the list of keyfiles to run

In [40]:
run_dir = os.path.abspath('keyfiles_to_run\\PN')
to_run = glob.glob(os.path.join(run_dir, '*.key'))

A function to execute FVS that will be mapped to all keyfiles.

In [47]:
def run_fvs(keyfile):
    fvs_exe = 'C:\\FVSbin\\'+os.path.split(keyfile)[-1][:5]+'.exe'
    subprocess.call([fvs_exe, '--keywordfile='+keyfile]) # run fvs
    
    base_dir = os.path.split(keyfile)[0]
    base_name = os.path.split(keyfile)[-1].split('.')[0]
    
    # clean-up the outputs
    # move the .out and .key file
    path = os.path.join(base_dir, 'completed','keyfiles')
    if not os.path.exists(path): 
        os.makedirs(path)
    shutil.move(keyfile, os.path.join(base_dir,'completed','keyfiles'))
    path = os.path.join(base_dir, 'completed','outfiles')
    if not os.path.exists(path):
        os.makedirs(path)
    shutil.move(os.path.join(base_dir,base_name+'.out'), os.path.join(base_dir,'completed','outfiles'))
    
    # delete the other files
    os.remove(os.path.join(base_dir, base_name+'.trl'))
    return keyfile

Run the following command in an ipython window to start up a cluster of workers:

`>> activate Py3.5 # or other environment name`

`>> ipcluster start -n 4 # or other number of cores`

In [7]:
c = ipp.Client()
c.ids

[0, 1, 2, 3]

In [None]:
# subprocess.call(['C:\\FVSbin\\FVSpn.exe', '--keywordfile='+to_run[0]])

Create a direct view of the workers and a load-balanced view for submitting jobs

In [8]:
dv = c[:]
v = c.load_balanced_view()

Import subprocess to all workers

In [9]:
with dv.sync_imports():
    import subprocess
    import shutil
    import os

importing subprocess on engine(s)
importing shutil on engine(s)
importing os on engine(s)


Execute an ayschronous batch of FVS runs for all the keyfiles

In [45]:
res = v.map_async(run_fvs, to_run)

Check on the progress

In [46]:
res.wait_interactive()

  54/54 tasks finished after  182 s
done


In [None]:
# Return a true/false if full set of jobs completed
# res.ready()

# Cancels the batch
# res.abort()

In [43]:
print('Human time spent:', res.wall_time)
print('Computer time spent:', res.serial_time)
print('Async speedup:', res.serial_time/res.wall_time)
print('Human time per FVS run:', res.wall_time/res.progress)
print('Computer time per FVS run:', res.serial_time/res.progress)

Human time spent: 148.855184
Computer time spent: 578.3411209999998
Async speedup: 3.885260193558323
Human time per FVS run: 2.756577481481482
Computer time per FVS run: 10.710020759259256


In [None]:
c.shutdown(hub=True)