## Define a simple MLFlow workflow

In [1]:
def run_mlflow_workflow(sqlite_path, batch_size=64, number_epochs=10):
    import uuid
    import mlflow

    mlflow.set_tracking_uri(
        f"sqlite:///" f"{sqlite_path}"
    )
    experiment_name = "LinearRegression"
    experiment_id = mlflow.create_experiment(
        experiment_name + str(uuid.uuid4())
    )
    with mlflow.start_run(experiment_id=experiment_id) as run:
        mlflow.log_params({"number_epochs": number_epochs})
        mlflow.log_params({"batch_size": batch_size})

        print("Generated training metadata.")
        mlflow.log_metric("loss", 0.04)
        return run.info.run_uuid

In [2]:
# Set log levels
import os
os.environ['LOG_STREAM_LEVEL'] = "error"
os.environ['LOG_FILE_LEVEL'] = "debug"

## Initialize interceptor

In [3]:
from flowcept import MLFlowInterceptor
interceptor = MLFlowInterceptor()

## Initialize consumer API

In [4]:
from flowcept import FlowceptConsumerAPI
consumer = FlowceptConsumerAPI(interceptor)
consumer.start()

<flowcept.flowcept_api.consumer_api.FlowceptConsumerAPI at 0x169c68610>

In [5]:
## Run MLFlow workflow

In [6]:
mlflow_run_id = run_mlflow_workflow(interceptor.settings.file_path)
print(f"MLflow task id={mlflow_run_id})


Trained model


'14123442c2d74b6a87f7b8c15f30b6ad'

## Initialize Query API

In [7]:
from flowcept import TaskQueryAPI
query_api = TaskQueryAPI()

## Query the task executed

In [8]:
_filter = {"task_id": mlflow_run_id}
query_api.query(_filter)

[{'task_id': '14123442c2d74b6a87f7b8c15f30b6ad',
  'utc_timestamp': 1676057669.196684,
  'status': 'FINISHED',
  'used': {'batch_size': '64', 'number_epochs': '10'},
  'generated': {'loss': 0.04},
  'plugin_id': 'mlflow',
  'user': 'root',
  'experiment_id': 'super-experiment',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'rsr',
  'public_ip': '2620:0:2b30:e0::4b9',
  'private_ip': '10.158.26.233',
  'debug': True}]

# Inspecting more tasks through the query api

In [9]:
from flowcept.commons.utils import get_utc_minutes_ago

### Get the tasks executed in my experiment in the last 60 minutes

In [10]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "experiment_id": "super-experiment"
}
query_api.query(_filter)

[{'task_id': 'dummy_func1-ff697be9ec8641033d80d8f01e72b52b',
  'custom_metadata': {'scheduler': 'tcp://127.0.0.1:55960',
   'scheduler_id': 'Scheduler-7cd46b5a-0f7a-40f1-85a9-b403b10d93b8',
   'scheduler_pid': 33398},
  'status': 'SUBMITTED',
  'utc_timestamp': 1676056235.084591,
  'activity_id': 'dummy_func1',
  'used': {'arg0': 0.2980732034559712},
  'workflow_id': 'wf_0d5e4a40-03e6-47ee-9f46-a047a47dfd19',
  'plugin_id': 'dask',
  'user': 'root',
  'experiment_id': 'super-experiment',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'rsr',
  'public_ip': '2620:0:2b30:e0::4b9',
  'private_ip': '10.158.26.233',
  'debug': True,
  'address': 'tcp://127.0.0.1:55967',
  'generated': {'arg0': 0.5961464069119424},
  'start_time': 1676038201.8871877,
  'end_time': 1676038201.8871946,
  'dependents': ['dummy_func2-7602538d2e41e97b798f9289735e3e8c',
   'dummy_func3-adda83d53183fb96c2c7abb330fb5b20']},
 {'task_id': 'dummy_func2-7602538d2e41e97b798f9289735e3e8c',
  'custom_me

### Get the tasks executed in my experiment in the last 60 minutes that were executed using mlflow and dask

In [35]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "experiment_id": "super-experiment",
    "plugin_id": {"$in": ['mlflow', 'dask']}
}
docs = query_api.query(_filter)
docs

[{'task_id': 'dd167381101d46b1a7ec43db36ba52eb',
  'utc_timestamp': 1675796172.583409,
  'status': 'FINISHED',
  'used': {'batch_size': '64', 'number_epochs': '10'},
  'generated': {'loss': 0.04},
  'plugin_id': 'mlflow',
  'user': 'root',
  'experiment_id': 'super-experiment',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'rsr',
  'public_ip': '2620:0:2b30:e0::4b9',
  'private_ip': '10.158.26.233',
  'debug': True},
 {'task_id': '9faf2ef9125e4e8a8b33df7edc02879b',
  'utc_timestamp': 1675796220.932569,
  'status': 'FINISHED',
  'used': {'batch_size': '64', 'number_epochs': '10'},
  'generated': {'loss': 0.04},
  'plugin_id': 'mlflow',
  'user': 'root',
  'experiment_id': 'super-experiment',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'rsr',
  'public_ip': '2620:0:2b30:e0::4b9',
  'private_ip': '10.158.26.233',
  'debug': True},
 {'task_id': '69dd04cbea944c5b80b723426079b187',
  'utc_timestamp': 1675796229.67979,
  'status': 'FINISHED',
  'u

### Get the tasks executed in my experiment in the last 60 minutes that generated a batch_size > 0

In [44]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "experiment_id": "super-experiment",
    "plugin_id": 'dask',
    "generated.batch_size": { "$gte" : 0 }
}
docs = query_api.query(filter=_filter)
batch_sizes = []
for doc in docs:
    print(f"task={doc['task_id']}, generated batch_size={doc['generated']['batch_size']}")
    batch_sizes.append(doc['generated']['batch_size'])
batch_sizes

task=dummy_func3-5711dd02e7574d2e484d545a9827112d, generated batch_size=19


[19]

### Now run a new MLFlow task using the batch_sizes generated by the Dask workflow

In [49]:
for batch_size in batch_sizes:
    mlflow_task = run_mlflow_workflow(interceptor.settings.file_path, batch_size=batch_size)
    print(mlflow_task)


Trained model
7f515374639d4631b1fcaaf4c0365902
We need to intercept this Run: 7f515374639d4631b1fcaaf4c0365902
Going to send to Redis an intercepted message:
	{"task_id": "7f515374639d4631b1fcaaf4c0365902", "utc_timestamp": 1675799022.065446, "status": "FINISHED", "used": {"batch_size": "19", "number_epochs": "10"}, "generated": {"loss": 0.04}, "plugin_id": "mlflow", "user": "root", "experiment_id": "super-experiment", "sys_name": "Darwin", "node_name": "MAC132633", "login_name": "rsr", "public_ip": "2620:0:2b30:e0::4b9", "private_ip": "10.158.26.233"}
An intercepted message was received.
Time to flush!


### Get these tasks

In [50]:
_filter = {
    "task_id": mlflow_task
}
docs = query_api.query(filter=_filter)
docs

[{'task_id': '7f515374639d4631b1fcaaf4c0365902',
  'utc_timestamp': 1675799022.065446,
  'status': 'FINISHED',
  'used': {'batch_size': '19', 'number_epochs': '10'},
  'generated': {'loss': 0.04},
  'plugin_id': 'mlflow',
  'user': 'root',
  'experiment_id': 'super-experiment',
  'sys_name': 'Darwin',
  'node_name': 'MAC132633',
  'login_name': 'rsr',
  'public_ip': '2620:0:2b30:e0::4b9',
  'private_ip': '10.158.26.233',
  'debug': True}]

An intercepted message was received.
Time to flush!
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
Time to flush!
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
Time to flush!
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
Time to flush!
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
Time to flush!
An intercepted message was received.
An intercepted message was received.
An intercepted message was received.
