## Parsl Test/Demo with the @App decorators.

We outline a simple set of workflow patterns and show how they can be implemented with the @App decorators.
Here are the workflows :

1. Single apps.
2. N apps launched in parallel
3. N apps launched in sequence such that the i'th task depends solely on the i-1'th.
4. N sequences launched in parallel.


In [1]:
import time
import random
from parsl import *
import parsl
from functools import partial
parsl.set_stream_logger()

# Here we specify the 
workers = ThreadPoolExecutor(max_workers=4)
#workers = ProcessPoolExecutor(max_workers=4)
dfk = DataFlowKernel(workers)

''' Here we define some basic bash apps.
'''
@App('bash', dfk)
def echo(inputs=[], stderr='std.err', stdout='std.out'):        
    cmd_line = 'echo {inputs[0]} {inputs[1]}'

@App('bash', dfk)
def echo_to_file(inputs=[], outputs=[], stderr='std.err', stdout='std.out'):        
    cmd_line = 'echo {inputs} > {outputs[0]}'
    
@App('bash', dfk)
def sleep_n(t):
    cmd_line = 'sleep {t}'    

@App('bash', dfk)
def cats_n_sleep (x, inputs, outputs):
    cmd_line = 'sleep $(($RANDOM % {x})); cat {inputs[0]} > {outputs[0]}'
    
@App('bash', dfk)
def incr (inputs, outputs):
    cmd_line = 'y=$(cat {inputs[0]}); echo $(($y+1)) > {outputs[0]}'
    

2017-02-17 19:16:56,140 parsl.app.app [DEBUG] Apptype : bash
2017-02-17 19:16:56,141 parsl.app.app [DEBUG] Executor : <class 'parsl.dataflow.dflow.DataFlowKernel'>
2017-02-17 19:16:56,141 parsl.app.app [DEBUG] Decorator Exec : <function echo at 0x7f5f64689400>
2017-02-17 19:16:56,142 parsl.app.app [DEBUG] __init__ 
2017-02-17 19:16:56,143 parsl.app.app [DEBUG] Apptype : bash
2017-02-17 19:16:56,144 parsl.app.app [DEBUG] Executor : <class 'parsl.dataflow.dflow.DataFlowKernel'>
2017-02-17 19:16:56,144 parsl.app.app [DEBUG] Decorator Exec : <function echo_to_file at 0x7f5f64689598>
2017-02-17 19:16:56,145 parsl.app.app [DEBUG] __init__ 
2017-02-17 19:16:56,146 parsl.app.app [DEBUG] Apptype : bash
2017-02-17 19:16:56,146 parsl.app.app [DEBUG] Executor : <class 'parsl.dataflow.dflow.DataFlowKernel'>
2017-02-17 19:16:56,147 parsl.app.app [DEBUG] Decorator Exec : <function sleep_n at 0x7f5f646898c8>
2017-02-17 19:16:56,148 parsl.app.app [DEBUG] __init__ 
2017-02-17 19:16:56,148 parsl.app.app 

In [None]:
fu, outs = echo(inputs=["Hello", "World!"], stdout='std.out', stderr='std.err')

In [None]:
#fu, outs = echo(inputs=["Hello", "World"])
print(fu, outs)

In [None]:
with open('test.txt', 'w') as tmp:
    tmp.write("E=mc^2")
    
f, outs = cats_n_sleep (x=5,inputs=['test.txt'],outputs=['out.txt'])

In [None]:
print(f,outs)
print("Duration      : ", f.result())
print("Output        : ", outs[0].result())
print("Output done ? : ", outs[0].done())

In [None]:
%%time
import os
import shutil
shutil.rmtree('./outputs') ; os.mkdir('./outputs')

''' This tests the first pattern, a parallel for loop.

Pool          |     Width      |     Duration
Process       |     10K        |       2.96s
Process       |      1K        |       311ms
Process       |    100K        |       29.6s
'''

def launch_n (n):
    d = {}    
    for i in range(0,n):        
        d[i] = cats_n_sleep(x=4, inputs=['test.txt'], outputs=['outputs/out.{0}.txt'.format(i)])
    return d

x = launch_n(4)
os.listdir('outputs/')

In [None]:
print([x[i][1][0].filepath for i in x])
os.listdir('outputs/')

In [None]:
%%time
''' Testing Pipeline, a sequence of dependencies.

A -> B ... -> N

'''
open('start.txt', 'w').write('0');

def pipeline_n (n):
    d    = {}
    deps = ['start.txt']
    for i in range(0,n):        
        print(i, deps)         
        fu, deps = incr(inputs=deps, outputs=['incr.{0}.txt'.format(i)])
        d[i] = {'App_fu' : fu, 'Data_fu' : deps}
    return d


foo = pipeline_n (5)
print(foo)

In [None]:
for key in sorted(foo.keys()):
    print (key, foo[key]['Data_fu'][0].filepath, open(foo[key]['Data_fu'][0].filepath,'r').read())

In [None]:
'''  MapReduce
        foo   foo ... foo
          \    |      /
           \   |     /
             merge
''' 


def map_n_reduce(n, dfk):
    map_stage = []
    for i in range(0,n):
        fu, outs = echo_to_file(inputs=[str(i)], outputs=['map.{0}.txt'.format(i)])
        map_stage.extend(outs)        
    
    red = echo_to_file(inputs=map_stage, outputs=['reduced.txt'])    
    return map_stage, red

m,r = map_n_reduce(3, dfk)

In [None]:
print(m, r)

r[1][0].result()

In [None]:
print(open('reduced.txt', 'r').read())

In [2]:
@App('python', dfk)
def foo(x):
    return x*3

2017-02-17 19:16:59,595 parsl.app.app [DEBUG] Apptype : python
2017-02-17 19:16:59,596 parsl.app.app [DEBUG] Executor : <class 'parsl.dataflow.dflow.DataFlowKernel'>
2017-02-17 19:16:59,597 parsl.app.app [DEBUG] Decorator Exec : <function foo at 0x7f5f646899d8>
2017-02-17 19:16:59,597 parsl.app.app [DEBUG] __init__ 


In [4]:
x = foo(5)
print(x[0].result())

2017-02-17 19:17:17,649 parsl.app.app [DEBUG] In __Call__
2017-02-17 19:17:17,650 parsl.app.app [DEBUG] Submitting via : <parsl.dataflow.dflow.DataFlowKernel object at 0x7f5f64ecef98>
2017-02-17 19:17:17,723 parsl.app.app [DEBUG] Exec   : functools.partial(<function foo at 0x7f5f646899d8>, 5)
2017-02-17 19:17:17,723 parsl.dataflow.dflow [DEBUG] Task:8669c4b2-98bf-4db2-9ca8-dd0ca2bbfb35   dep_cnt:0  deps:[]
2017-02-17 19:17:17,724 parsl.dataflow.dflow [DEBUG] Submitting to executor : 8669c4b2-98bf-4db2-9ca8-dd0ca2bbfb35
2017-02-17 19:17:17,725 parsl.dataflow.dflow [DEBUG] Completed : 8669c4b2-98bf-4db2-9ca8-dd0ca2bbfb35 with <Future at 0x7f5f64ef71d0 state=finished returned int>
2017-02-17 19:17:17,726 parsl.dataflow.dflow [DEBUG] Pending:0   Runnable:1   Done:2
2017-02-17 19:17:17,726 parsl.dataflow.dflow [DEBUG] Launched : 8669c4b2-98bf-4db2-9ca8-dd0ca2bbfb35 with <AppFuture at 0x7f5f6467e320 state=finished returned int>
2017-02-17 19:17:17,727 parsl.dataflow.futures [DEBUG] Waiting o

15
