In [1]:
# Sleeps are used because these notebooks are being tested automatically as part of the CI/CD. 
# In a normal user interaction, these sleeps would not be necessary.
from time import sleep

## Define a simple MLFlow workflow

In [2]:
def run_mlflow_workflow(sqlite_path, batch_size=64, epochs=10):
    import uuid
    import mlflow

    mlflow.set_tracking_uri(
        f"sqlite:///" f"{sqlite_path}"
    )
    experiment_name = "LinearRegression"
    experiment_id = mlflow.create_experiment(
        experiment_name + str(uuid.uuid4())
    )
    with mlflow.start_run(experiment_id=experiment_id) as run:
        mlflow.log_params({"epochs": epochs})
        mlflow.log_params({"batch_size": batch_size})
        # Actual training code would go here
        print("Generated training metadata.")
        mlflow.log_metric("loss", 0.04)
        return run.info.run_uuid

In [3]:
# Set log levels
import os
os.environ['LOG_STREAM_LEVEL'] = "error"
os.environ['LOG_FILE_LEVEL'] = "debug"

## Initialize interceptor

In [4]:
from flowcept import MLFlowInterceptor
interceptor = MLFlowInterceptor()

## Initialize consumer API

In [5]:
from flowcept import FlowceptConsumerAPI
consumer = FlowceptConsumerAPI(interceptor)
consumer.start()

<flowcept.flowcept_api.consumer_api.FlowceptConsumerAPI at 0x286fb3c10>

## Run MLFlow workflow

In [6]:
interceptor.settings.file_path

'mlflow.db'

In [7]:
mlflow_run_id = run_mlflow_workflow(interceptor.settings.file_path)
print(f"MLflow task id={mlflow_run_id}")

Generated training metadata.
MLflow task id=a266251480884c178c2c9f560d0030dd


In [8]:
sleep(10)

## Initialize Query API

In [9]:
from flowcept import TaskQueryAPI
query_api = TaskQueryAPI()

## Query the task executed

In [10]:
_filter = {"task_id": mlflow_run_id}
query_api.query(_filter)

[{'task_id': 'a266251480884c178c2c9f560d0030dd',
  'utc_timestamp': 1687896979.872887,
  'used': {'batch_size': '64', 'epochs': '10'},
  'generated': {'loss': 0.04},
  'plugin_id': 'mlflow',
  'user': 'root',
  'campaign_id': 'super_campaign',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'login_name',
  'hostname': 'mac132633.ornl.gov',
  'extra_metadata': {'place_holder': ''},
  'debug': True,
  'finished': True}]

# Inspect more tasks through the query api

In [11]:
from flowcept.commons.utils import get_utc_minutes_ago

### Get the tasks executed in my experiment in the last 60 minutes

In [16]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "campaign_id": "super_campaign"
}
query_api.query(_filter)

[{'task_id': 'dummy_func1-e2a4c6309f2b80d9d8abbadc2b5c8e5d',
  'custom_metadata': {'scheduler': 'tcp://127.0.0.1:59761',
   'scheduler_id': 'Scheduler-336f5657-4abd-4dbf-99dc-cde742dfd505',
   'scheduler_pid': 45895},
  'submission_time': 1687892509.605954,
  'activity_id': 'dummy_func1',
  'used': {'arg0': 0.3933722210431303},
  'workflow_id': 'wf_2956d8ef-73be-4a74-8bc2-25ec4e000508',
  'utc_timestamp': 1687892509.607035,
  'plugin_id': 'dask',
  'user': 'root',
  'campaign_id': 'super_campaign',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'login_name',
  'hostname': 'mac132633.ornl.gov',
  'extra_metadata': {'place_holder': ''},
  'debug': True,
  'submitted': True,
  'address': 'tcp://127.0.0.1:59768',
  'running': True,
  'generated': {'arg0': 0.7867444420862606},
  'start_time': 1687878109.6109192,
  'end_time': 1687878109.6109223,
  'finished': True},
 {'task_id': 'dummy_func2-c8cf61cd62c17a0da6fb1098706b5dbe',
  'custom_metadata': {'scheduler': 'tcp://12

### Get the tasks executed in my experiment in the last 60 minutes that were executed using mlflow and dask

In [18]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "campaign_id": "super_campaign",
    "plugin_id": {"$in": ['mlflow', 'dask']}
}
docs = query_api.query(_filter)
docs

[{'task_id': 'dummy_func1-e2a4c6309f2b80d9d8abbadc2b5c8e5d',
  'custom_metadata': {'scheduler': 'tcp://127.0.0.1:59761',
   'scheduler_id': 'Scheduler-336f5657-4abd-4dbf-99dc-cde742dfd505',
   'scheduler_pid': 45895},
  'submission_time': 1687892509.605954,
  'activity_id': 'dummy_func1',
  'used': {'arg0': 0.3933722210431303},
  'workflow_id': 'wf_2956d8ef-73be-4a74-8bc2-25ec4e000508',
  'utc_timestamp': 1687892509.607035,
  'plugin_id': 'dask',
  'user': 'root',
  'campaign_id': 'super_campaign',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'login_name',
  'hostname': 'mac132633.ornl.gov',
  'extra_metadata': {'place_holder': ''},
  'debug': True,
  'submitted': True,
  'address': 'tcp://127.0.0.1:59768',
  'running': True,
  'generated': {'arg0': 0.7867444420862606},
  'start_time': 1687878109.6109192,
  'end_time': 1687878109.6109223,
  'finished': True},
 {'task_id': 'dummy_func2-c8cf61cd62c17a0da6fb1098706b5dbe',
  'custom_metadata': {'scheduler': 'tcp://12

### Get the tasks executed in my experiment in the last 60 minutes that generated a batch_size > 0

In [19]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "campaign_id": "super_campaign",
    "plugin_id": 'dask',
    "generated.batch_size": { "$gte" : 0 }
}
docs = query_api.query(filter=_filter)
batch_sizes = []
for doc in docs:
    print(f"task={doc['task_id']}, generated batch_size={doc['generated']['batch_size']}")
    batch_sizes.append(doc['generated']['batch_size'])
batch_sizes

task=calculate_batch_and_epochs-bf4ebf3fbbc76e7517ea3b4eab46c56e, generated batch_size=18
task=calculate_batch_and_epochs-5af210f426b0a8c42a9302411639c68e, generated batch_size=21
task=calculate_batch_and_epochs-2153e7442c9cb4ec0537bd7f6504f448, generated batch_size=20
task=calculate_batch_and_epochs-4acef3484d0b37571f7656a9d5efd819, generated batch_size=17
task=calculate_batch_and_epochs-cea197d7e360873a91a6bf0e015023a1, generated batch_size=16
task=calculate_batch_and_epochs-fba26a979e3a9ad1d59824712b3b3e32, generated batch_size=20
task=calculate_batch_and_epochs-6b979ef78f9983cedd596e25a1ac8746, generated batch_size=19


[18, 21, 20, 17, 16, 20, 19]

### Now run a new MLFlow task using the batch_sizes generated by the Dask workflow

In [20]:
for batch_size in batch_sizes:
    mlflow_task = run_mlflow_workflow(interceptor.settings.file_path, batch_size=batch_size)
    print(mlflow_task)

Generated training metadata.
65d1ebc23f1d4a649fa5f56bda1a0789
Generated training metadata.
1b411ee574894115aedb2cc47b262673
Generated training metadata.
9618dfcb559f42c5b22e9c5a5df24cdb
Generated training metadata.
d897be05f3c249b896114cfc1d83645f
Generated training metadata.
b711f70593ac409ca87691682ce6a7e2
Generated training metadata.
31ca334391ac47af951d0a5bb848b5e2
Generated training metadata.
15386ab3fce542af86a4b1282fab236c


In [21]:
sleep(10)

### Get these tasks

In [22]:
_filter = {
    "task_id": mlflow_task
}
docs = query_api.query(filter=_filter)
docs

[{'task_id': '15386ab3fce542af86a4b1282fab236c',
  'utc_timestamp': 1687897491.719,
  'used': {'batch_size': '19', 'epochs': '10'},
  'generated': {'loss': 0.04},
  'plugin_id': 'mlflow',
  'user': 'root',
  'campaign_id': 'super_campaign',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'login_name',
  'hostname': 'mac132633.ornl.gov',
  'extra_metadata': {'place_holder': ''},
  'debug': True,
  'finished': True}]

## Stop consumers

In [23]:
consumer.stop()