In [None]:
# hide
# default_exp experiment_tracking

In [None]:
# export

import tempfile
from pathlib import Path

import pandas as pd

from sciflow.experiment.engine import ExperimentEngine
from sciflow.experiment.tracking import MockTracker
from sciflow.run_flow import check_call_flow

In [None]:
# export

tracker = MockTracker()

# Create a flow and annotate it with tracking

You do this by adding a tracker object that has the same interface as `sciflow.experiment.tracking.SciFlowTracker`. When run within a converted flow this will be replaced by a `StepTracker`.

There are two calls to track entities:

* `tracker.log_metric`: log a metric value with name, value and step increment which enables a sequence of values over the workflow execution lifetime.
* `tracker.add_artifact`: add a file artifact. These should be saved to the local filesystem first then will be uploaded to remote storage and persisted as part of the flow execution.

In [None]:
# exportn_step:first


def first(tracker):
    results = {}
    if tracker:
        with tempfile.TemporaryDirectory() as temp_dir:
            tracker.log_metric("rmse", 0.5, 0)
            csv_path = f"{temp_dir}/first.csv"
            df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
            df.to_csv(csv_path)
            tracker.add_artifact(csv_path)
    return results

In [None]:
# exportn_step:last


def last(tracker):
    results = {}
    if tracker:
        with tempfile.TemporaryDirectory() as temp_dir:
            tracker.log_metric("r2", 0.9, 0)
            df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
            fig = df.a.plot.hist().figure
            png_path = f"{temp_dir}/last.png"
            fig.savefig(png_path)
            tracker.add_artifact(png_path)
    return results

In [None]:
# tst
!sciflow_build_lib

Experiment tracking is a paramter on the conversion modules; the current default is to enable experiment tracking by default but this can be turned off using the `track` param as desired.

In [None]:
# tst
!sciflow_metaflow --track True

In [None]:
# tst
print(
    check_call_flow(
        Path("flows", "metaflow", "experiment_tracking.py"), flow_command="run"
    )[1]
)

In [None]:
engine = ExperimentEngine(base_key="experiment_tracking")

In [None]:
ex = engine.find_latest()[0]

In [None]:
expected = pd.DataFrame({"r2": [0.9], "rmse": [0.5]}, index=pd.Series([0], name="step"))
actual_metrics = ex.metrics_as_df()
pd.testing.assert_frame_equal(expected, actual_metrics)

In [None]:
actual_metrics

In [None]:
ex.artifacts