## Parsl Test/Demo

We outline a simple set of workflow patterns and show how they can be implemented with the raw DataFlowKernel.
Here are the workflows :

1. Single apps.
2. N apps launched in parallel
3. N apps launched in sequence such that the i'th task depends solely on the i-1'th.
4. N sequences launched in parallel.


In [1]:
import time
import random
from parsl import *
import parsl
from functools import partial
#parsl.set_stream_logger()

#workers = ThreadPoolExecutor(max_workers=4)
workers = ProcessPoolExecutor(max_workers=4)
dfk = DataFlowKernel(workers)

def cback(*args):
    #print("cbk : {}", args)
    return args[0]*5

def sleep_n (x):
    time.sleep(x)
    return x

def sleep_n_rand(x,y):
    ''' Sleep x +/- random(y) seconds
    '''
    import time
    import random
    dur = x+random.randint(-y,y)
    time.sleep(dur)
    return dur

In [2]:
x = dfk.submit(partial(cback,1), [], None)

In [3]:
print(x.result())

5


In [9]:
%%time

''' This tests the first pattern, a parallel for loop.

Pool          |     Width      |     Duration
Process       |     10K        |       2.96s
Process       |      1K        |       311ms
Process       |    100K        |       29.6s
'''

def launch_n (n):
    d = {}    
    for i in range(0,n):
        #d[i] = dfk.submit(partial(sleep_n_rand,10,5), [], None)
        d[i] = dfk.submit(partial(cback,i), [], None)
    return d

x = launch_n(100000)
#time.sleep(10)
print(sum([x[i].result() for i in x]))

24999750000
CPU times: user 31.7 s, sys: 8.04 s, total: 39.8 s
Wall time: 29.6 s


In [None]:
%%time
''' Testing Pipeline, a sequence of dependencies.

A -> B ... -> N

'''

def pipeline_n (n):
    d = {}
    for i in range(0,n):
        if i-1 not in d:        
            deps = [] 
        else:
            deps = [d[i-1]]
            
        print(i, deps)        
        #d[i] = "hello {0}".format(i)
        #d[i] = dfk.submit(partial(cback,1), deps, None)
        d[i] = dfk.submit(partial(sleep_n_rand,5,5), deps, None)
        
    return d


foo = pipeline_n (4)
print(foo)

In [None]:
'''  Pipeline
     A -> B ... -> N
'''
import time

def sleep_n (x):
    time.sleep(x)
    return x

def pipeline_n (n):
    d = {}
    for i in range(0,n):
        if i-1 not in d:        
            deps = [] 
        else:
            deps = [d[i-1]]
            
        print(i, deps)        
        #d[i] = "hello {0}".format(i)
        d[i] = dfk.submit(partial(sleep_n, 10), deps, None)
        
    return d


foo = pipeline_n (4)
#dfk.current_state()
print ([foo[i].result() for i in foo])

In [None]:
'''  MapReduce
        foo   foo ... foo
          \    |      /
           \   |     /
             merge
''' 


def map_n_reduce(n, dfk):
    map_stage = []
    for i in range(0,n):
        map_stage.append(dfk.submit(partial(sleep_n, 2), [], None))
    
    #print(map_stage)
    red = dfk.submit(partial(sleep_n, 1), map_stage, None)
    return map_stage, red

m,r = map_n_reduce(1, dfk)

In [None]:
print(m, r)

r.result()