In [1]:
! python reset_dask_nb_exec_counts.py   
# This notebook is causing a silly bug after starting dask cluster. 
# This command just resets the nb's execution counts to fix it. 
# Just save the notebook (cmd+s) after you run dask cluster setup.

In [12]:
# Sleeps are used because these notebooks are being tested automatically as part of the CI/CD. 
# In a normal user interaction, these sleeps would not be necessary.
from time import sleep

In [2]:
def dummy_func1(x, workflow_id=None):
    return x * 2


def dummy_func2(y, workflow_id=None):
    return y + y


def dummy_func3(z, w, workflow_id=None):
    return {"batch_size": int(z + w + 16)}

In [3]:
# Optionally set up env vars to control Flowcept's log level
import os
os.environ['LOG_STREAM_LEVEL'] = "error"
os.environ['LOG_FILE_LEVEL'] = "debug"

In [4]:
def setup_local_dask_cluster():
    from dask.distributed import Client, LocalCluster
    from flowcept import (
        FlowceptDaskSchedulerPlugin,
        FlowceptDaskWorkerPlugin,
    )
    cluster = LocalCluster(n_workers=2)
    scheduler = cluster.scheduler
    client = Client(scheduler.address)

    # Instantiate and Register FlowceptPlugins
    scheduler_plugin = FlowceptDaskSchedulerPlugin(scheduler)
    scheduler.add_plugin(scheduler_plugin)
    worker_plugin = FlowceptDaskWorkerPlugin()
    client.register_worker_plugin(worker_plugin)
    
    return client

In [5]:
dask_client = setup_local_dask_cluster()
dask_client



0,1
Connection method: Direct,
Dashboard: http://127.0.0.1:8787/status,

0,1
Comm: tcp://127.0.0.1:56387,Workers: 2
Dashboard: http://127.0.0.1:8787/status,Total threads: 10
Started: Just now,Total memory: 16.00 GiB

0,1
Comm: tcp://127.0.0.1:56394,Total threads: 5
Dashboard: http://127.0.0.1:56397/status,Memory: 8.00 GiB
Nanny: tcp://127.0.0.1:56390,
Local directory: /var/folders/jx/23j21rtx1czb2tpqht16pz907m8f48/T/dask-worker-space/worker-9v8uiklt,Local directory: /var/folders/jx/23j21rtx1czb2tpqht16pz907m8f48/T/dask-worker-space/worker-9v8uiklt
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.34 MiB,Spilled bytes: 0 B
Read bytes: 0.0 B,Write bytes: 0.0 B

0,1
Comm: tcp://127.0.0.1:56395,Total threads: 5
Dashboard: http://127.0.0.1:56396/status,Memory: 8.00 GiB
Nanny: tcp://127.0.0.1:56391,
Local directory: /var/folders/jx/23j21rtx1czb2tpqht16pz907m8f48/T/dask-worker-space/worker-4k5yto80,Local directory: /var/folders/jx/23j21rtx1czb2tpqht16pz907m8f48/T/dask-worker-space/worker-4k5yto80
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 66.5%,Last seen: Just now
Memory usage: 367.33 MiB,Spilled bytes: 0 B
Read bytes: 39.40 kiB,Write bytes: 31.04 kiB


## Starting Flowcept's Consumer

In [6]:
from flowcept import FlowceptConsumerAPI
consumer = FlowceptConsumerAPI()

In [7]:
consumer.start()

<flowcept.flowcept_api.consumer_api.FlowceptConsumerAPI at 0x2840e3ac0>

## Client.Submit-based Workflow

In [8]:
import numpy as np
from uuid import uuid4
i1 = np.random.random()
i1 = np.random.random()
wf_id = f"wf_{uuid4()}"
print(f"Workflow_Id={wf_id}")
o1 = dask_client.submit(dummy_func1, i1, workflow_id=wf_id)
o2 = dask_client.submit(dummy_func2, o1, workflow_id=wf_id)
o3 = dask_client.submit(dummy_func3, o1, o2, workflow_id=wf_id)
print(f"Result={o3.result()}")
print(f"Task3_id={o3.key}")

Workflow_Id=wf_d1777dc4-bcb8-4caa-af7d-1fda7aabd35d
Result={'batch_size': 20}
Task3_id=dummy_func3-a5532dbb19f6ee8eb9df16d34fe29101


In [null]:
sleep(10)

## Starting Flowcept Query API

In [9]:
from flowcept import TaskQueryAPI
query_api = TaskQueryAPI()

In [null]:
sleep(5)  # These sleeps 

In [10]:
_filter = {"workflow_id": wf_id}
tasks = query_api.query(_filter)
tasks

[{'task_id': 'dummy_func1-27b109469155692a51dc5beeb73903c4',
  'custom_metadata': {'scheduler': 'tcp://127.0.0.1:56387',
   'scheduler_id': 'Scheduler-bb138f70-babf-43e5-9b79-abbd359ba445',
   'scheduler_pid': 34265},
  'status': 'FINISHED',
  'utc_timestamp': 1676056417.387205,
  'activity_id': 'dummy_func1',
  'used': {'arg0': 0.7610238851750382},
  'workflow_id': 'wf_d1777dc4-bcb8-4caa-af7d-1fda7aabd35d',
  'plugin_id': 'dask',
  'user': 'root',
  'experiment_id': 'super-experiment',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'rsr',
  'public_ip': '2620:0:2b30:e0::4b9',
  'private_ip': '10.158.26.233',
  'debug': True,
  'address': 'tcp://127.0.0.1:56394',
  'generated': {'arg0': 1.5220477703500763},
  'start_time': 1676038417.3887954,
  'end_time': 1676038417.3887985},
 {'task_id': 'dummy_func2-bfb285a3ab939a8a8b8e5de2b30e929a',
  'custom_metadata': {'scheduler': 'tcp://127.0.0.1:56387',
   'scheduler_id': 'Scheduler-bb138f70-babf-43e5-9b79-abbd359ba445',
 

## Stopping consumer

In [11]:
consumer.stop()