In [1]:
! python reset_dask_nb_exec_counts.py   
! rm -f output.log
# This notebook is causing a silly bug after starting dask cluster. 
# This command just resets the nb's execution counts to fix it. 
# Just save the notebook (cmd+s) after you run dask cluster setup.

In [2]:
# Sleeps are used because these notebooks are being tested automatically as part of the CI/CD. 
# In a normal user interaction, these sleeps would not be necessary.
from time import sleep

In [3]:
def dummy_func1(x, workflow_id=None):
    return x * 2


def dummy_func2(y, workflow_id=None):
    return y + y


def calculate_batch_and_epochs(z, w, workflow_id=None):
    return {
        "batch_size": int(z + w + 16),
        "epochs": max(int(z/w)+1, 2)
    }

In [4]:
# Optionally set up env vars to control Flowcept's log level
import os
os.environ['LOG_STREAM_LEVEL'] = "error"
os.environ['LOG_FILE_LEVEL'] = "debug"

In [5]:
def setup_local_dask_cluster():
    from dask.distributed import Client, LocalCluster
    from flowcept import (
        FlowceptDaskSchedulerPlugin,
        FlowceptDaskWorkerPlugin,
    )
    cluster = LocalCluster(n_workers=2)
    scheduler = cluster.scheduler
    client = Client(scheduler.address)

    # Instantiate and Register FlowceptPlugins
    scheduler_plugin = FlowceptDaskSchedulerPlugin(scheduler)
    scheduler.add_plugin(scheduler_plugin)
    worker_plugin = FlowceptDaskWorkerPlugin()
    client.register_worker_plugin(worker_plugin)
    
    return client

## Start Flowcept's Consumer

In [6]:
from flowcept import FlowceptConsumerAPI
consumer = FlowceptConsumerAPI()

In [7]:
consumer.start()

<flowcept.flowcept_api.consumer_api.FlowceptConsumerAPI at 0x107408f10>

## Start Local Dask Cluster

In [8]:
dask_client = setup_local_dask_cluster()
dask_client



0,1
Connection method: Direct,
Dashboard: http://127.0.0.1:8787/status,

0,1
Comm: tcp://127.0.0.1:64487,Workers: 2
Dashboard: http://127.0.0.1:8787/status,Total threads: 10
Started: Just now,Total memory: 16.00 GiB

0,1
Comm: tcp://127.0.0.1:64494,Total threads: 5
Dashboard: http://127.0.0.1:64497/status,Memory: 8.00 GiB
Nanny: tcp://127.0.0.1:64490,
Local directory: /var/folders/jx/23j21rtx1czb2tpqht16pz907m8f48/T/dask-worker-space/worker-s5b769nf,Local directory: /var/folders/jx/23j21rtx1czb2tpqht16pz907m8f48/T/dask-worker-space/worker-s5b769nf
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 56.59 MiB,Spilled bytes: 0 B
Read bytes: 41.69 MiB,Write bytes: 0.0 B

0,1
Comm: tcp://127.0.0.1:64495,Total threads: 5
Dashboard: http://127.0.0.1:64496/status,Memory: 8.00 GiB
Nanny: tcp://127.0.0.1:64491,
Local directory: /var/folders/jx/23j21rtx1czb2tpqht16pz907m8f48/T/dask-worker-space/worker-c82d3vmm,Local directory: /var/folders/jx/23j21rtx1czb2tpqht16pz907m8f48/T/dask-worker-space/worker-c82d3vmm
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 56.91 MiB,Spilled bytes: 0 B
Read bytes: 42.11 MiB,Write bytes: 0.0 B


## Client.Submit-based Workflow

In [9]:
import numpy as np
from uuid import uuid4
i1 = np.random.random()
i1 = np.random.random()
wf_id = f"wf_{uuid4()}"
print(f"Workflow_Id={wf_id}")
o1 = dask_client.submit(dummy_func1, i1, workflow_id=wf_id)
o2 = dask_client.submit(dummy_func2, o1, workflow_id=wf_id)
o3 = dask_client.submit(calculate_batch_and_epochs, o1, o2, workflow_id=wf_id)
print(f"Task3_id={o3.key}")
print(f"Result={o3.result()}")

Workflow_Id=wf_c84857c0-6a95-4eef-a9b2-a0fcad2dc2b4
Task3_id=calculate_batch_and_epochs-6b979ef78f9983cedd596e25a1ac8746
Result={'batch_size': 19, 'epochs': 2}


In [10]:
sleep(10)

## Start Flowcept Query API

In [11]:
from flowcept import TaskQueryAPI
query_api = TaskQueryAPI()

In [12]:
sleep(5)  # These sleeps 

## Query the database

In [13]:
_filter = {"workflow_id": wf_id}
tasks = query_api.query(_filter)
tasks

[{'task_id': 'dummy_func1-6b9cbd2dcfab03acf8176ba9e8610893',
  'custom_metadata': {'scheduler': 'tcp://127.0.0.1:64487',
   'scheduler_id': 'Scheduler-062bf23d-dfe4-444d-88c5-16148e9fac38',
   'scheduler_pid': 52680},
  'submission_time': 1687896427.587981,
  'activity_id': 'dummy_func1',
  'used': {'arg0': 0.5514012560011143},
  'workflow_id': 'wf_c84857c0-6a95-4eef-a9b2-a0fcad2dc2b4',
  'utc_timestamp': 1687896427.590602,
  'plugin_id': 'dask',
  'user': 'root',
  'campaign_id': 'super_campaign',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'login_name',
  'hostname': 'mac132633.ornl.gov',
  'extra_metadata': {'place_holder': ''},
  'debug': True,
  'submitted': True,
  'address': 'tcp://127.0.0.1:64494',
  'running': True,
  'generated': {'arg0': 1.1028025120022287},
  'start_time': 1687882027.5906188,
  'end_time': 1687882027.590622,
  'finished': True},
 {'task_id': 'dummy_func2-1114bce36aa636585aa261dab99c9d81',
  'custom_metadata': {'scheduler': 'tcp://127

In [14]:
dask_client.shutdown()

In [15]:
sleep(3)

## Stop consumer

In [16]:
consumer.stop()

Worker process died unexpectedly
Worker process died unexpectedly
Exception in thread Nanny stop queue watch:
Traceback (most recent call last):
  File "/Users/rsr/opt/miniconda3/envs/flowcept/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/Users/rsr/opt/miniconda3/envs/flowcept/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/rsr/opt/miniconda3/envs/flowcept/lib/python3.8/site-packages/distributed/nanny.py", line 897, in watch_stop_q


In [3]:
# The exception above is caused within Dask's Nannies after a client.shutdown(). This only happens when Dask cluster is shut down. It will happen even if consumer.stop() is not used.

In [17]:
exit()