In [None]:
# Sleeps are used because these notebooks are being tested automatically as part of the CI/CD. 
# In a normal user interaction, these sleeps would not be necessary.
from time import sleep

## Define a simple MLFlow workflow

In [None]:
def run_mlflow_workflow(sqlite_path, batch_size=64, number_epochs=10):
    import uuid
    import mlflow

    mlflow.set_tracking_uri(
        f"sqlite:///" f"{sqlite_path}"
    )
    experiment_name = "LinearRegression"
    experiment_id = mlflow.create_experiment(
        experiment_name + str(uuid.uuid4())
    )
    with mlflow.start_run(experiment_id=experiment_id) as run:
        mlflow.log_params({"number_epochs": number_epochs})
        mlflow.log_params({"batch_size": batch_size})

        print("Generated training metadata.")
        mlflow.log_metric("loss", 0.04)
        return run.info.run_uuid

In [None]:
# Set log levels
import os
os.environ['LOG_STREAM_LEVEL'] = "error"
os.environ['LOG_FILE_LEVEL'] = "debug"

## Initialize interceptor

In [None]:
from flowcept import MLFlowInterceptor
interceptor = MLFlowInterceptor()

## Initialize consumer API

In [None]:
from flowcept import FlowceptConsumerAPI
consumer = FlowceptConsumerAPI(interceptor)
consumer.start()

## Run MLFlow workflow

In [None]:
mlflow_run_id = run_mlflow_workflow(interceptor.settings.file_path)
print(f"MLflow task id={mlflow_run_id}")

In [None]:
sleep(10)

## Initialize Query API

In [None]:
from flowcept import TaskQueryAPI
query_api = TaskQueryAPI()

## Query the task executed

In [None]:
_filter = {"task_id": mlflow_run_id}
query_api.query(_filter)

# Inspect more tasks through the query api

In [None]:
from flowcept.commons.utils import get_utc_minutes_ago

### Get the tasks executed in my experiment in the last 60 minutes

In [None]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "experiment_id": "super-experiment"
}
query_api.query(_filter)

### Get the tasks executed in my experiment in the last 60 minutes that were executed using mlflow and dask

In [None]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "experiment_id": "super-experiment",
    "plugin_id": {"$in": ['mlflow', 'dask']}
}
docs = query_api.query(_filter)
docs

### Get the tasks executed in my experiment in the last 60 minutes that generated a batch_size > 0

In [None]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "experiment_id": "super-experiment",
    "plugin_id": 'dask',
    "generated.batch_size": { "$gte" : 0 }
}
docs = query_api.query(filter=_filter)
batch_sizes = []
for doc in docs:
    print(f"task={doc['task_id']}, generated batch_size={doc['generated']['batch_size']}")
    batch_sizes.append(doc['generated']['batch_size'])
batch_sizes

### Now run a new MLFlow task using the batch_sizes generated by the Dask workflow

In [None]:
for batch_size in batch_sizes:
    mlflow_task = run_mlflow_workflow(interceptor.settings.file_path, batch_size=batch_size)
    print(mlflow_task)

In [None]:
sleep(10)

### Get these tasks

In [None]:
_filter = {
    "task_id": mlflow_task
}
docs = query_api.query(filter=_filter)
docs

## Stop consumers

In [None]:
consumer.stop()