## Parsl Test/Demo with the @App decorators.

We outline a simple set of workflow patterns and show how they can be implemented with the @App decorators.
Here are the workflows :

1. Single apps.
2. N apps launched in parallel
3. N apps launched in sequence such that the i'th task depends solely on the i-1'th.
4. N sequences launched in parallel.


In [1]:
import time
import random
from parsl import *
import parsl
from functools import partial
parsl.set_stream_logger()

# Here we specify the 
workers = ThreadPoolExecutor(max_workers=4)
#workers = ProcessPoolExecutor(max_workers=4)
dfk = DataFlowKernel(workers)

''' Here we define some basic bash apps.
'''
@App('bash', dfk)
def echo(inputs=[], stderr='std.err', stdout='std.out'):        
    cmd_line = 'echo {inputs[0]} {inputs[1]}'

@App('bash', dfk)
def echo_to_file(inputs=[], outputs=[], stderr='std.err', stdout='std.out'):        
    cmd_line = 'echo {inputs} > {outputs[0]}'
    
@App('bash', dfk)
def sleep_n(t):
    cmd_line = 'sleep {t}'    

@App('bash', dfk)
def cats_n_sleep (x, inputs, outputs):
    cmd_line = 'sleep $(($RANDOM % {x})); cat {inputs[0]} > {outputs[0]}'
    
@App('bash', dfk)
def incr (inputs, outputs):
    cmd_line = 'y=$(cat {inputs[0]}); echo $(($y+1)) > {outputs[0]}'
    

2017-02-17 18:28:25,155 parsl.app.app [DEBUG] Apptype : bash
2017-02-17 18:28:25,222 parsl.app.app [DEBUG] Executor : <class 'parsl.dataflow.dflow.DataFlowKernel'>
2017-02-17 18:28:25,223 parsl.app.app [DEBUG] Decorator Exec : <function echo at 0x7fe1422e2378>
2017-02-17 18:28:25,224 parsl.app.app [DEBUG] __init__ 
2017-02-17 18:28:25,224 parsl.app.app [DEBUG] Apptype : bash
2017-02-17 18:28:25,225 parsl.app.app [DEBUG] Executor : <class 'parsl.dataflow.dflow.DataFlowKernel'>
2017-02-17 18:28:25,226 parsl.app.app [DEBUG] Decorator Exec : <function echo_to_file at 0x7fe1422e2620>
2017-02-17 18:28:25,227 parsl.app.app [DEBUG] __init__ 
2017-02-17 18:28:25,227 parsl.app.app [DEBUG] Apptype : bash
2017-02-17 18:28:25,228 parsl.app.app [DEBUG] Executor : <class 'parsl.dataflow.dflow.DataFlowKernel'>
2017-02-17 18:28:25,229 parsl.app.app [DEBUG] Decorator Exec : <function sleep_n at 0x7fe1422e28c8>
2017-02-17 18:28:25,229 parsl.app.app [DEBUG] __init__ 
2017-02-17 18:28:25,230 parsl.app.app 

In [None]:
fu, outs = echo(inputs=["Hello", "World!"], stdout='std.out', stderr='std.err')

In [None]:
#fu, outs = echo(inputs=["Hello", "World"])
print(fu, outs)

In [None]:
with open('test.txt', 'w') as tmp:
    tmp.write("E=mc^2")
    
f, outs = cats_n_sleep (x=5,inputs=['test.txt'],outputs=['out.txt'])

In [None]:
print(f,outs)
print("Duration      : ", f.result())
print("Output        : ", outs[0].result())
print("Output done ? : ", outs[0].done())

In [None]:
%%time
import os
import shutil
shutil.rmtree('./outputs') ; os.mkdir('./outputs')

''' This tests the first pattern, a parallel for loop.

Pool          |     Width      |     Duration
Process       |     10K        |       2.96s
Process       |      1K        |       311ms
Process       |    100K        |       29.6s
'''

def launch_n (n):
    d = {}    
    for i in range(0,n):        
        d[i] = cats_n_sleep(x=4, inputs=['test.txt'], outputs=['outputs/out.{0}.txt'.format(i)])
    return d

x = launch_n(4)
os.listdir('outputs/')

In [None]:
print([x[i][1][0].filepath for i in x])
os.listdir('outputs/')

In [None]:
%%time
''' Testing Pipeline, a sequence of dependencies.

A -> B ... -> N

'''
open('start.txt', 'w').write('0');

def pipeline_n (n):
    d    = {}
    deps = ['start.txt']
    for i in range(0,n):        
        print(i, deps)         
        fu, deps = incr(inputs=deps, outputs=['incr.{0}.txt'.format(i)])
        d[i] = {'App_fu' : fu, 'Data_fu' : deps}
    return d


foo = pipeline_n (5)
print(foo)

In [None]:
for key in sorted(foo.keys()):
    print (key, foo[key]['Data_fu'][0].filepath, open(foo[key]['Data_fu'][0].filepath,'r').read())

In [2]:
'''  MapReduce
        foo   foo ... foo
          \    |      /
           \   |     /
             merge
''' 


def map_n_reduce(n, dfk):
    map_stage = []
    for i in range(0,n):
        fu, outs = echo_to_file(inputs=[str(i)], outputs=['map.{0}.txt'.format(i)])
        map_stage.extend(outs)        
    
    red = echo_to_file(inputs=map_stage, outputs=['reduced.txt'])    
    return map_stage, red

m,r = map_n_reduce(3, dfk)

2017-02-17 18:28:30,763 parsl.app.app [DEBUG] In __Call__
2017-02-17 18:28:30,770 parsl.app.app [DEBUG] Received : ['0'] 
2017-02-17 18:28:30,823 parsl.app.app [DEBUG] Submitting via : <parsl.dataflow.dflow.DataFlowKernel object at 0x7fe1422d8fd0>
2017-02-17 18:28:30,823 parsl.app.app [DEBUG] cmd    : echo {inputs} > {outputs[0]}
2017-02-17 18:28:30,824 parsl.app.app [DEBUG] Exec   : echo ['0'] > map.0.txt
2017-02-17 18:28:30,825 parsl.dataflow.dflow [DEBUG] Task:7fc331a4-5e16-41dc-a465-139865728b89   dep_cnt:0  deps:[]
2017-02-17 18:28:30,825 parsl.dataflow.dflow [DEBUG] Submitting to executor : 7fc331a4-5e16-41dc-a465-139865728b89
2017-02-17 18:28:30,827 parsl.app.app [DEBUG] Running app : bash
2017-02-17 18:28:30,827 parsl.dataflow.dflow [DEBUG] Launched : 7fc331a4-5e16-41dc-a465-139865728b89 with <AppFuture at 0x7fe1422d8978 state=running>
2017-02-17 18:28:30,828 parsl.app.app [DEBUG] Launching app : echo ['0'] > map.0.txt
2017-02-17 18:28:30,829 parsl.app.futures [DEBUG] Filepath 

In [3]:
print(m, r)

r[1][0].result()

[<DataFuture at 0x7fe1422d8ac8 state=pending>, <DataFuture at 0x7fe1422e0a58 state=pending>, <DataFuture at 0x7fe1422e0a90 state=pending>] (<AppFuture at 0x7fe1422e0ac8 state=finished returned float>, [<DataFuture at 0x7fe1422e0cc0 state=pending>])


'/home/yadu/swython/reduced.txt'

In [None]:
print(open('reduced.txt', 'r').read())