In [1]:
import sys
import time
import threading
import multiprocessing

sys.path.append("../")
from IoTPy.core.stream import Stream, StreamArray, run
from IoTPy.agent_types.op import map_element, map_list, map_window
from IoTPy.helper_functions.recent_values import recent_values
from IoTPy.helper_functions.print_stream import print_stream

from IoTPy.concurrency.multicore import get_processes, get_processes_and_procs
from IoTPy.concurrency.multicore import terminate_stream
from IoTPy.concurrency.multicore import get_proc_that_inputs_source
from IoTPy.concurrency.multicore import extend_stream

## A Process Executes an Agent
A process in a multicore application executes an agent with the following signature:
<br>
<br>
<b>f(in_streams, out_streams)</b>
<br>
<br>
where:
<ol>
    <li> <i>f</i> is a function. </li>
    <li> <i>in_streams</i> is a list of input streams. </li>
    <li> <i>out_streams</i> is a list of output streams. </li>
</ol>
Next we show a collection of agents, <i>f</i>, <i>g</i>, <i>h</i>, and <i>r</i>, with this signature. We will use these agents in the examples of multicore programs.

In [2]:
def f(in_streams, out_streams):
    map_element(lambda v: v+100, in_streams[0], out_streams[0])

def g(in_streams, out_streams):
    s = Stream('s')
    map_element(lambda v: v*2, in_streams[0], s)
    print_stream(s, 's')

def h(in_streams, out_streams):
    map_element(lambda v: v*2, in_streams[0], out_streams[0])

def r(in_streams, out_streams):
    t = Stream('t')
    map_element(lambda v: v*3, in_streams[0], t)
    print_stream(t, 't')

## Threads
A process may execute an arbitrary number of threads. You can use any thread target. A thread that gets data from an external source, such as a sensor or a Twitter stream, and the puts that data into a stream calls the following function:
<br>
<br>
<b>extend_stream(procs, data, stream_name)</b>
<br>
<br>
where
<ol>
    <li> <i>procs</i> is a list of process metadata created from the specification of a multicore program. <i>procs</i> is passed as a parameter to the thread target. We will discuss <i>procs</i> later. </li>
    <li> <i>data</i> is a list or an array. </li>
    <li> <i>stream_name</i> is a string which is the name of a stream.</li>
</ol>
In the example, <i>source_thread_target</i>, the function has a single argument <i>procs</i>. All thread targets that extend streams must have <i>procs</i> as one of its arguments.
<br>
<br>
This function executes a loop in which puts specified data into a stream called <i>x</i> and then sleeps thus yielding the thread.
<br>
<br>
<b>terminate_stream</b>
<br>
<br>


In [3]:
def source_thread_target(procs):
    for i in range(3):
        extend_stream(procs, data=list(range(i*2, (i+1)*2)), stream_name='x')
        time.sleep(0.001)
    terminate_stream(procs, stream_name='x')

## Simple example of a multicore program
### Processes and their connecting streams
Look at <b>multicore_specification</b>. The specification states that the program has two processes called p0 and p1. Process p0 has a single input stream <i>x</i> and a single output stream <i>y</i>. Process p1 has a single input stream <i>y</i> and no output streams. Thus, the output <i>y</i> of process p0 is the input of process p1.
<br>
### Streams
Streams are specified by a list of pairs where each pair is a stream name and a stream type. The stream type 'i' identifies integers, 'f' floats and 'd' double. We use stream types to allow processes to share memory in Python 2.7+. In this example, the pair ('x', 'i') says that the program has a stream <i>x</i> of type int.
<br>
### Sources
Process p0 has a <b>source_functions</b> called <i>h</i>. Function <i>h</i> executes in its own thread within process p0; this thread is started when the process is started. Function <i>h</i> has a single argument called <i>proc</i> which is a dummy argument that represents a process. 
<br>
<br>
Function <i>h</i> puts data into stream <i>x</i> when it executes <b>proc.copy_stream()</b>. The thread executing <i>h</i> then sleeps for 0.001 seconds before appending more data to stream <i>x</i>. Finally, the thread signals that the source has terminated appending data to stream <i>x</i> by calling <b>proc.finished_source('x')</b>.
### Process Structure
The source <i>h</i> outputs a stream <i>x</i> which is an input of process p0. The output <i>y</i> of process p0 is an input to process p1.
### Process Computations
The computation of a process is specified by a function with two arguments <i>in_streams</i> and <i>out_streams</i>. The computation carried out by p0 is specified by function <i>f</i> which reads a single input stream, <i>in_streams[0]</i> and write a single output stream, <i>out_streams[0]</i>. This agent makes:
<br>
<br>
<b> y[n] = x[n] + 100 </b>
<br>
<br>
The computation carried out by process p1 is specified by function <i>g</i> which prints <b>2 * y[n]</b> for all n.
<br>
<br>
The source function <i>h</i> sets x[n] to n, and so this multicore process prints:
<br>
<br>
<b> 2 * (n + 100) </b>

In [4]:
def f(in_streams, out_streams):
    map_element(lambda v: v+100, in_streams[0], out_streams[0])

def g(in_streams, out_streams):
    s = Stream('s')
    map_element(lambda v: v*2, in_streams[0], s)
    print_stream(s, 's')

multicore_specification = [
    # Streams
    [('x', 'i'), ('y', 'i')],
    # Processes
    [{'name': 'p0', 'agent': f, 'inputs':['x'], 'outputs': ['y'], 'sources':['x']},
     {'name': 'p1', 'agent': g, 'inputs': ['y']}]]

processes, procs = get_processes_and_procs(multicore_specification)
thread_0 = threading.Thread(target=source_thread_target, args=(procs,))
procs['p1'].threads = [thread_0]

for process in processes: process.start()
for process in processes: process.join()
for process in processes: process.terminate()

s[0] = 200
s[1] = 202
s[2] = 204
s[3] = 206
s[4] = 208
s[5] = 210


In [5]:
multicore_specification = [
    # Streams
    [('x', 'i'), ('y', 'i')],
    # Processes
    [{'name': 'p0', 'agent': f, 'inputs':['x'], 'outputs': ['y']},
     {'name': 'p1', 'agent': g, 'inputs': ['y'], 'sources':['x']}]]

processes, procs = get_processes_and_procs(multicore_specification)
thread_0 = threading.Thread(target=source_thread_target, args=(procs,))
procs['p1'].threads = [thread_0]

for process in processes: process.start()
for process in processes: process.join()
for process in processes: process.terminate()

s[0] = 200
s[1] = 202
s[2] = 204
s[3] = 206
s[4] = 208
s[5] = 210


In [6]:
multicore_specification = [
    # Streams
    [('x', 'x'), ('y', 'x')],
    # Processes
    [{'name': 'p0', 'agent': f, 'inputs':['x'], 'outputs': ['y']},
     {'name': 'p1', 'agent': g, 'inputs': ['y'], 'sources':['x']}]]

processes, procs = get_processes_and_procs(multicore_specification)
thread_0 = threading.Thread(target=source_thread_target, args=(procs,))
procs['p1'].threads = [thread_0]

for process in processes: process.start()
for process in processes: process.join()
for process in processes: process.terminate()

s[0] = 200
s[1] = 202
s[2] = 204
s[3] = 206
s[4] = 208
s[5] = 210


## Example of Three Processes in a Row
This example is the same as the previous one except that it has a third process attached to process p2. The source thread <i>h</i> feeds stream <i>x</i> which is the input to process p0. The output of p0 is stream <i>y</i> which is the input to process p1. The output of p1 is stream <i>z</i> which is the input to process p2.
<br>
### Streams
[('x', 'i'), ('y', 'i'), ('z', 'i')]
This specifies that this system has three streams called 'x', 'y' and 'z' which contain ints.
### Sources
<b>Source Function</b> <i>h</i>
<br>
This function runs in its own thread. The function puts [0, 1, 2] into the stream called <i>x</i>, then sleeps, and then puts [3, 4, 5] into <i>x</i>. The function then calls <i>finished_source</i> to indicate that it has finished executing and so no further values will be appended to <i>x</i>.
<br>
<br>
This function executes in a thread that runs in process <i>p0</i> because <i>h</i> appears in the specification for <i>p0</i>:
<br>
{'name': 'p0', 'agent': f, 'inputs':['x'], 'outputs': ['y'], 'sources': ['x'], <b>'source_functions':[h]</b>}
<br>
<b>Stream Sources</b> Stream <i>x</i> is a source in process <i>p0</i> because it appears in the specification of process <i>p0</i>.
### Process Structure
<ol>
    <li>Source function <i>h</i> feeds stream <i>x</i> which is an input of process <i>p0</i>. </li>
    <li> Output stream <i>y</i> of process <i>p0</i> is an input stream of process <i>p1</i>.</li>
    <li> Output stream <i>z</i> of process <i>p1</i> is an input stream of process <i>p2</i>.</li>
    <li> Process <i>p2</i> has no output streams. </li>
</ol>

### Process Functions
Each process function has parameters <i>in_streams</i>, <i>out_streams</i> and possibly additional keyword or positional arguments. The process functions associated with processes <i>p0</i>, <i>p1</i>, and <i>p2</i> are <i>f</i>, <i>g</i> and <i>r</i>, respectively. The process function for a process is in the processes part of <i>multicore_specification</i>.
<br>
<ol>
    <li> The source extends stream <i>x</i> with [0, 1, 2, 3, 4, 5] and then calls <i>finished_source</i>. Thus <b>x[n] = n </b> for n less than 6. </li>
    <li> Process function <i>f</i> of <i>p0</i> adds 100 to its <i>in_streams[0]</i> which is stream <i>x</i> and puts the result in its <i>out_streams[0]</i> which is stream <i>y</i>. Thus <b>y[n] = x[n]+100 = n + 100 for </b> </li>.
    <li> Process function <i>g</i> of <i>p1</i> multiplies 2 to its <i>in_streams[0]</i> which is stream <i>y</i> and puts the result in its <i>out_streams[0]</i> which is stream <i>z</i>. Thus <b>z[n] = 2*y[n] = 2n + 200 for </b> </li>.
    <li> Process function <i>r</i> of <i>p2</i> creates a stream <i>s</i> and multiplies 3 to its <i>in_streams[0]</i> which is stream <i>z</i> and and puts the result stream <i>s</i>. This function also prints stream <i>s</i>. Thus it prints <b>3*z[n] = 6n + 600 for </b> </li>.
</ol>

In [7]:
multicore_specification = [
    # Streams
    [('x', 'i'), ('y', 'i'), ('z', 'i')],
    # Processes
    [{'name': 'p0', 'agent': f, 'inputs':['x'], 'outputs': ['y']},
     {'name': 'p1', 'agent': h, 'inputs': ['y'], 'outputs': ['z'], 'sources': ['x']},
     {'name': 'p2', 'agent': r, 'inputs': ['z']}]
    ]

processes, procs = get_processes_and_procs(multicore_specification)
thread_0 = threading.Thread(target=source_thread_target, args=(procs,))
procs['p1'].threads = [thread_0]

for process in processes: process.start()
for process in processes: process.join()
for process in processes: process.terminate()

t[0] = 600
t[1] = 606
t[2] = 612
t[3] = 618
t[4] = 624
t[5] = 630


In [8]:
multicore_specification = [
    # Streams
    [('x', 'x'), ('y', 'x'), ('z', 'x')],
    # Processes
    [{'name': 'p0', 'agent': f, 'inputs':['x'], 'outputs': ['y']},
     {'name': 'p1', 'agent': h, 'inputs': ['y'], 'outputs': ['z'], 'sources': ['x']},
     {'name': 'p2', 'agent': r, 'inputs': ['z']}]
    ]

processes, procs = get_processes_and_procs(multicore_specification)
thread_0 = threading.Thread(target=source_thread_target, args=(procs,))
procs['p1'].threads = [thread_0]

for process in processes: process.start()
for process in processes: process.join()
for process in processes: process.terminate()

t[0] = 600
t[1] = 606
t[2] = 612
t[3] = 618
t[4] = 624
t[5] = 630


## Example of Multicore with NumPy Arrays
This example illustrates the use of <b>StreamArray</b> which is a stream treated as a NumPy array with an arbitrarily large number of rows. Using <i>StreamArray</i> can be more efficient than using <i>Stream</i> for large computations. 
<br>
<br>
These examples are simple and small; however, in most applications each process function would convert an input stream to a <i>StreamArray</i> and carry out a lot computation as arrays before sending the results as output streams.
<br>
<br>
The streams, sources, and process structure are similar to the previous two examples. The process functions differ in that the functions in this example use <i>StreamArray</i> whereas the earlier examples used <i>Stream</i>.
<br>
<br>
You convert a Stream of numbers to a StreamArray of ints, floats, or doubles by calling the functions <b>dtype_int</b>, <b>dtype_float</b>, and <b>dtype_double</b> respectively.
<br>
<br>
In this example, the agent functions <i>f</i> and <i>g</i> operate on StreamArrays of floats though the source function <i>h</i> generates a stream of int.

In [9]:
import numpy as np
from IoTPy.helper_functions.type import dtype_float

def test_multicore_with_arrays():
    def f_numpy(in_streams, out_streams):
        map_window(np.mean, dtype_float(in_streams[0]), out_streams[0], window_size=2, step_size=2)

    def g_numpy(in_streams, out_streams):
        t = StreamArray('t')
        map_window(max, dtype_float(in_streams[0]), t, window_size=2, step_size=2)
        print_stream(t, 't')

    def thread_target_numpy(procs):
        for i in range(3):
            extend_stream(procs, data=list(range(i*10, (i+1)*10)), stream_name='x')
            time.sleep(0.001)
        terminate_stream(procs, stream_name='x')

    multicore_specification = [
        # Streams
        [('x', 'i'), ('y', 'f')],
        # Processes
        [{'name': 'p0', 'agent': f_numpy, 'inputs':['x'], 'outputs': ['y'], 'sources': ['x']},
         {'name': 'p1', 'agent': g_numpy, 'inputs': ['y']}]
    ]

    processes, procs = get_processes_and_procs(multicore_specification)
    thread_0 = threading.Thread(target=thread_target_numpy, args=(procs,))
    procs['p1'].threads = [thread_0]

    for process in processes: process.start()
    for process in processes: process.join()
    for process in processes: process.terminate()

test_multicore_with_arrays()

t[0] = 2.5
t[1] = 6.5
t[2] = 10.5
t[3] = 14.5
t[4] = 18.5
t[5] = 22.5
t[6] = 26.5


## Example of Merging Streams from Multiple Processes
This example shows a slightly more complex process structure. The example has four processes
called <i>coordinator</i>, <i>sine</i>, <i>cosine</i>, and <i>tangent</i>. The <i>coordinator/</i> generates a sequence of values that are sent to other processes which compute sines, cosines and tangents of these values and send the results back to the <i>coordinator</i>. The <i>coordinator</i> then computes the square of the error --- the difference between tangent and sine/cosine.
<br>
<br>
This example gives names to agents. This is helpful in debugging because the error statements identify the agent that caused the error. We haven't given names to agents in some examples for brevity.

### Process Structure
<ol>
    <li> A source function <i>h</i> extends stream <i>x</i> with a sequence of 10 values between 0.0 and pi. This source function executes in a thread in the process called <i>coordinator</i>. Stream <i>x</i> is an input for all processes.
    </li> 
    <li> Agents <i>sine</i>, <i>cosine</i>, and <i>tangent</i> read stream <i>x</i> and output streams <i>sines</i>, <i>cosines</i>, and <i>tangents</i> respectively. These streams are inputs to process <i>coordinate</i>.
    </li>
<ol> 

In [10]:
from IoTPy.agent_types.merge import zip_map

def example_merging_streams_from_multiple_processes():
    def sine(in_streams, out_streams):
        map_element(np.sin, dtype_float(in_streams[0]), out_streams[0], name='sine')

    def cosine(in_streams, out_streams):
        map_element(np.cos, dtype_float(in_streams[0]), out_streams[0], name='cosine')

    def tangent(in_streams, out_streams):
        map_element(np.tan, dtype_float(in_streams[0]), out_streams[0], name='tangent')

    def coordinate(in_streams, out_streams):
        x, sines, cosines, tangents = in_streams

        def f(lst): return lst[0]/lst[1]

        def g(lst):
            error_squared= (lst[0] - lst[1])**2
            return error_squared
    
        ratios = Stream('ratios')
        errors = Stream('errors')
        zip_map(f, [sines, cosines], ratios, name='sine / cosine')
        zip_map(g, [ratios, tangents], errors, name='compute error')
        print_stream(errors, 'error')

    # Source thread target.
    def source_thread_target(procs):
        extend_stream(procs, data = np.linspace(0.0, np.pi, 10), stream_name='x')
        terminate_stream(procs, stream_name='x')

    multicore_specification = [
        # Streams
        [('x', 'f'), ('sines', 'f'), ('cosines', 'f'), ('tangents', 'f')],
        # Processes
        [{'name': 'sine', 'agent': sine, 'inputs':['x'], 'outputs': ['sines']},
         {'name': 'cosine', 'agent': cosine, 'inputs':['x'], 'outputs': ['cosines']},
         {'name': 'tanget', 'agent': tangent, 'inputs':['x'], 'outputs': ['tangents']},
         {'name': 'coordinator', 'agent': coordinate, 'inputs':['x', 'sines', 'cosines', 'tangents'],
          'sources': ['x']}]
    ]

    processes, procs = get_processes_and_procs(multicore_specification)
    thread_0 = threading.Thread(target=source_thread_target, args=(procs,))
    procs['coordinator'].threads = [thread_0]

    for process in processes: process.start()
    for process in processes: process.join()
    for process in processes: process.terminate()

example_merging_streams_from_multiple_processes()

error[0] = 0.0
error[1] = 2.029099530475452e-17
error[2] = 4.306271662118447e-17
error[3] = 1.0658143404632256e-14
error[4] = 3.546012876784043e-14
error[5] = 2.844317562237704e-14
error[6] = 1.020295274871051e-15
error[7] = 6.133504185867917e-16
error[8] = 8.161840277450297e-16
error[9] = 0.0


## Passing Data to and from Multiprocessing Blocks
This example illustrates how to pass data to a multiprocessing block and get data from the block. This example is the same as the previous one except that the variables <b>total</b> and <b>num</b> are passed to the multiprocessing block which returns updated values of these variables.
<br>
<br>
total = multiprocessing.Value('f')
<br>
num = multiprocessing.Value('i')
<br>
<br>
creates <i>total</i> a wrapper for a float, and <i>num</i> a wrapper for int. 
<br>
<br>
These variables can be passed to any collection of processes. In this example they are passed only to the process <i>coordinator</i>.
These variables are assigned initial values from a computation that is not shown here. The multiprocessing block shown updates these values. For example, the value of <i>num</i> is 25 before the block is executed and 45 after it terminates.

### Passing variables as keyword or positional arguments
In this example, variables are passed to the process <i>coordinator</i> as keyword arguments.
The keyword arguments are specified as a dict with the name of an argument (e.g. 'total') and its initial value (<i>total</i>).
<br>
<br>
{'name': 'coordinator', 'agent': coordinate, 'inputs':['x', 'sines', 'cosines', 'tangents'],
<br>
 'sources': ['x'], 'source_functions':[sequence],
<br>
<b>'keyword_args'</b> : {'total' : total, 'num' : num},}

In [11]:
def test_example_passing_data_to_multicore():
        total = multiprocessing.Value('f')
        num = multiprocessing.Value('i')
        # Values computed from an earlier computation which is not shown.
        # total and num are passed to the multiprocessing block.
        total.value = 4.0e-13
        num.value = 25

        def sine(in_streams, out_streams):
            map_element(np.sin, dtype_float(in_streams[0]), out_streams[0], name='sine')

        def cosine(in_streams, out_streams):
            map_element(np.cos, dtype_float(in_streams[0]), out_streams[0], name='cosine')

        def tangent(in_streams, out_streams):
            map_element(np.tan, dtype_float(in_streams[0]), out_streams[0], name='tangent')

        def coordinate(in_streams, out_streams, total, num):
            x, sines, cosines, tangents = in_streams

            def f(lst): return lst[0]/lst[1]

            def g(lst):
                error_squared= (lst[0] - lst[1])**2
                return error_squared

            ratios = Stream('ratios')
            errors = Stream('errors')
            zip_map(f, [sines, cosines], ratios, name='sine / cosine')
            zip_map(g, [ratios, tangents], errors, name='compute error')
            print_stream(errors, 'error')

        # Source thread target.
        def source_thread_target(procs):
            extend_stream(procs, data=np.linspace(0.0, np.pi, 10), stream_name='x')
            terminate_stream(procs, stream_name='x')

        multicore_specification = [
            # Streams
            [('x', 'f'), ('sines', 'f'), ('cosines', 'f'), ('tangents', 'f')],
            # Processes
            [{'name': 'sine', 'agent': sine, 'inputs':['x'], 'outputs': ['sines']},
             {'name': 'cosine', 'agent': cosine, 'inputs':['x'], 'outputs': ['cosines']},
             {'name': 'tanget', 'agent': tangent, 'inputs':['x'], 'outputs': ['tangents']},
             {'name': 'coordinator', 'agent': coordinate, 'inputs':['x', 'sines', 'cosines', 'tangents'],
              'sources': ['x'], 'keyword_args' : {'total' : total, 'num' : num}}]
        ]

        processes, procs = get_processes_and_procs(multicore_specification)
        thread_0 = threading.Thread(target=source_thread_target, args=(procs,))
        procs['coordinator'].threads = [thread_0]

        for process in processes: process.start()
        for process in processes: process.join()
        for process in processes: process.terminate()

test_example_passing_data_to_multicore()

error[0] = 0.0
error[1] = 2.029099530475452e-17
error[2] = 4.306271662118447e-17
error[3] = 1.0658143404632256e-14
error[4] = 3.546012876784043e-14
error[5] = 2.844317562237704e-14
error[6] = 1.020295274871051e-15
error[7] = 6.133504185867917e-16
error[8] = 8.161840277450297e-16
error[9] = 0.0
