In [None]:
# default_exp experiment.tracking

# Flow Experiment Tracking

`sacred` ...

An issue that prevents greater adoption of the SIO stack sacred/incense/omniboard is dependence on an external service, namely MongoDB. ..

> This `sacred` observer adds support for a data lake observer. This observer stores all data in block storage under a root experiment directory. Each experiment component, e.g artifacts, metrics, runs is stored in it's own directory. Components like runs and metrics can be queried using a lake compatible query engine with a client ODBC driver. Files and other nested/unstructured entities can be accessed from the block storage client directly. The goal is to provide the same capability as the MongoDBObserver and hence to be compatible with key downstream libraries like: `incense` and `omniboard`.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# export


import datetime
import json
import sys
import tempfile
import time
import traceback as tb
import uuid
from pathlib import Path

import boto3
import pandas as pd
from botocore.exceptions import ClientError
from sacred import metrics_logger
from sacred.host_info import get_host_info
from sacred.serializer import flatten
from sacred.stdout_capturing import get_stdcapturer
from sacred.utils import IntervalTimer

from sciflow.s3_utils import delete_dir, list_bucket, load_json, put_data, s3_join
from sciflow.utils import prepare_env

In [None]:
prepare_env()
_bucket_name = os.environ["SCIFLOW_BUCKET"]

In [None]:
today = datetime.datetime.utcnow().strftime("%Y%m%d")
_flow_base_key = f"sciflow-experiment-testing-{today}"
_flow_run_id = f"sample_flow_instance_{str(uuid.uuid4())[-6:]}"
_flow_run_key = s3_join(_flow_base_key, _flow_run_id)
_s3_res = boto3.resource("s3")
_steps = ["experiment-test-1", "experiment-test-2"]
_step_name = _steps[0]
_flow_base_key

In [None]:
# export
# TODO replace the sacred flatten function and mvoe to s3_utils - needs a jsonpickle serialiser


def save_json(s3_res, bucket_name, key, filename, obj):
    key = s3_join(key, filename)
    put_data(
        s3_res, bucket_name, key, json.dumps(flatten(obj), sort_keys=True, indent=2)
    )

In [None]:
# export


class FlowTracker:
    def __init__(
        self,
        bucket_name,
        flow_base_key,
        flow_run_id,
        steps,
        params=None,
        run_name=None,
        region="eu-west-1",
    ):
        self.bucket_name = bucket_name
        self.flow_base_key = flow_base_key
        self.flow_run_id = flow_run_id
        self.steps = steps
        self.params = params
        self.run_name = run_name

        if region is not None:
            self.region = region
            self.s3_res = boto3.resource("s3", region_name=region)
        else:
            session = boto3.session.Session()
            if session.region_name is not None:
                self.region = session.region_name
                self.s3 = boto3.resource("s3")
            else:
                raise ValueError(
                    "You must either pass in an AWS region name, or have a "
                    "region name specified in your AWS config file"
                )

        self.run_entry_key = s3_join(flow_base_key, flow_run_id, "experiment", "runs")
        self.runs_table_key = s3_join(flow_base_key, "experiments", "runs", flow_run_id)

    def start(self, params=None):
        host_info = get_host_info()
        run_entry = {
            "experiment_id": self.flow_run_id,
            "experiment_name": self.run_name,
            "experiment": {"name": self.run_name},
            "format": None,
            "command": None,
            "host": host_info,
            "start_time": round(time.time()),
            "config": params if params is not None else {},
            "meta": {},
            "status": "RUNNING",
            "resources": [],
            "artifacts": [],
            "captured_out": "",
            "info": {},
            "heartbeat": None,
            "steps": self.steps,
        }

        save_json(
            self.s3_res, self.bucket_name, self.run_entry_key, "run.json", run_entry
        )
        save_json(
            self.s3_res,
            self.bucket_name,
            self.run_entry_key,
            "flow_start_run.json",
            run_entry,
        )
        save_json(
            self.s3_res, self.bucket_name, self.runs_table_key, "run.json", run_entry
        )
        # Create each step entry at flow start - in case of step failure
        for step in self.steps:
            save_json(
                self.s3_res,
                self.bucket_name,
                self.run_entry_key,
                f"step_{step}.json",
                run_entry,
            )
        print(f"Started tracking flow: {self.flow_run_id}")

    def interrupted(self):
        self._tracking_event("INTERRUPTED")
        print(f"Flow tracking interrupted: {self.flow_run_id}")

    def failed(self, except_info):
        self._tracking_event("FAILED", except_info)
        print(f"Flow tracking failed: {self.flow_run_id}")

    def completed(self):
        self._tracking_event("COMPLETED")
        print(f"Flow tracking completed: {self.flow_run_id}")

    def _tracking_event(self, event_status, except_info=None):
        run_entry = load_json(
            self.s3_res, self.bucket_name, s3_join(self.run_entry_key, "run.json")
        )
        run_entry["status"] = event_status
        run_entry["stop_time"] = round(time.time())
        run_entry["elapsed_time"] = round(
            run_entry["stop_time"] - run_entry["start_time"], 2
        )
        if except_info is not None:
            run_entry["fail_trace"] = tb.format_exception(
                except_info["exc_type"], except_info["exc_value"], except_info["trace"]
            )

        run_entry = self._merge_step_entries(run_entry)
        save_json(
            self.s3_res, self.bucket_name, self.run_entry_key, "run.json", run_entry
        )
        save_json(
            self.s3_res, self.bucket_name, self.runs_table_key, "run.json", run_entry
        )

    def _merge_step_entries(self, run_entry):
        all_hosts = {}
        captured_out = ""
        step_entries = {}
        for step in self.steps:
            s3_join(self.run_entry_key, f"step_{step}.json")
            step_entry = load_json(
                self.s3_res,
                self.bucket_name,
                s3_join(self.run_entry_key, f"step_{step}.json"),
            )
            all_hosts[step] = step_entry["host"]
            step_out = (
                "" if step_entry["captured_out"] is None else step_entry["captured_out"]
            )
            captured_out += f"******BEGIN step: {step}******\n"
            captured_out += step_out
            captured_out += f"******END step: {step}******\n"
            step_entries[step] = step_entry
        run_entry["all_hosts"] = all_hosts
        run_entry["captured_out"] = captured_out
        run_entry["steps"] = self.steps
        run_entry["step_entries"] = step_entries
        return run_entry

In [None]:
flow_tracker = FlowTracker(_bucket_name, _flow_base_key, _flow_run_id, _steps)

In [None]:
flow_tracker.start()

# Step-level

In [None]:
# export


class SciFlowTracker:
    def log_metric(self, metric_name, metric_value, metric_step):
        pass

    def add_artifact(self, artifact_path):
        pass

In [None]:
# export


class TempFileTracker(SciFlowTracker):
    def __init__(metrics_path=None, artifacts_dir=None):
        self.metrics_path = metrics_path
        self.artifacts_dir = artifacts_dir

    def log_metric(self, metric_name, metric_value, metric_step):
        # Append to csv dataframe
        pass

    def add_artifact(self, artifact_path):
        # Save file to tmp directory
        pass

In [None]:
# export


class MockTracker(SciFlowTracker):
    def log_metric(self, metric_name, metric_value, metric_step):
        pass

    def add_artifact(self, artifact_path):
        pass

In [None]:
# export


class StepTracker(SciFlowTracker):
    def __init__(
        self,
        bucket_name,
        flow_base_key,
        flow_run_id,
        step_name,
        capture_mode="sys",
        region="eu-west-1",
    ):
        self.bucket_name = bucket_name
        self.flow_base_key = flow_base_key
        self.flow_run_id = flow_run_id
        self.exp_base_key = s3_join(flow_base_key, flow_run_id, "experiment")
        self.step_name = step_name
        self.capture_mode = capture_mode
        self._stop_heartbeat_event = None
        self._heartbeat = None
        self._output_file = None
        self._metrics = metrics_logger.MetricsLogger()
        self.captured_out = None
        self.info = {}
        self.result = None
        self.start_time = round(time.time())

        if region is not None:
            self.region = region
            self.s3_res = boto3.resource("s3", region_name=region)
        else:
            session = boto3.session.Session()
            if session.region_name is not None:
                self.region = session.region_name
                self.s3_res = boto3.resource("s3")
            else:
                raise ValueError(
                    "You must either pass in an AWS region name, or have a "
                    "region name specified in your AWS config file"
                )

        try:
            self.saved_metrics = load_json(
                self.s3_res,
                bucket_name,
                s3_join(self.exp_base_key, "metrics", "metrics.json"),
            )
        except ClientError as ex:
            if ex.response["Error"]["Code"] == "NoSuchKey":
                self.saved_metrics = {}
            else:
                raise ex

        self.flow_start_run_entry = load_json(
            self.s3_res,
            bucket_name,
            s3_join(self.exp_base_key, "runs", "flow_start_run.json"),
        )
        self.run_entry = self.flow_start_run_entry
        self.init_keys()

    def start_heartbeat(self, beat_interval=10.0):
        print("Starting Heartbeat")
        self._stop_heartbeat_event, self._heartbeat = IntervalTimer.create(
            self._emit_heartbeat, beat_interval
        )
        self._heartbeat.start()

    def stop_heartbeat(self):
        print("Stopping Heartbeat")
        if self._heartbeat is not None:
            self._stop_heartbeat_event.set()
            self._heartbeat.join(timeout=2)

    def capture_out(self):
        # TODO figure out why only "sys" seems to work in Sagemaker? - tee is installed
        _, capture_stdout = get_stdcapturer(self.capture_mode)
        return capture_stdout()

    def get_captured_out(self):
        if self._output_file is None:
            raise IOError(
                "Attempting to get captured out when capturing has not been started. Remember to wrap tracked statements in 'with tracker.capture_out() as tracker._output_file:'"
            )
        if self._output_file.closed:
            return
        text = self._output_file.get()
        if isinstance(text, bytes):
            text = text.decode("utf-8", "replace")
        if self.captured_out:
            text = self.captured_out + text
        self.captured_out = text

    def log_metric(self, metric_name, metric_value, metric_step):
        if metric_name not in self.saved_metrics:
            self.saved_metrics[metric_name] = {
                "values": [],
                "steps": [],
                "timestamps": [],
            }

        self.saved_metrics[metric_name]["values"].append(metric_value)
        self.saved_metrics[metric_name]["steps"].append(metric_step)
        self.saved_metrics[metric_name]["timestamps"].append(
            datetime.datetime.utcnow().isoformat()
        )
        save_json(
            self.s3_res,
            self.bucket_name,
            self.metrics_key,
            "metrics.json",
            self.saved_metrics,
        )
        # TODO: handle parallel metric producing steps - requires merge of step entries
        save_json(
            self.s3_res,
            self.bucket_name,
            self.metrics_key,
            f"step_{self.step_name}_metrics.json",
            self.saved_metrics,
        )
        save_json(
            self.s3_res,
            self.bucket_name,
            self.metrics_table_key,
            "metrics.json",
            self.saved_metrics,
        )

    def add_artifact(self, artifact_path):
        name = Path(artifact_path).name
        self.save_file(self.artifacts_key, artifact_path, name)
        self.save_file(self.artifacts_table_key, artifact_path, name)
        self.run_entry["artifacts"].append(name)
        save_json(
            self.s3_res, self.bucket_name, self.runs_key, "run.json", self.run_entry
        )
        save_json(
            self.s3_res,
            self.bucket_name,
            self.runs_key,
            f"step_{self.step_name}.json",
            self.run_entry,
        )

    def _emit_heartbeat(self):
        beat_time = datetime.datetime.utcnow().isoformat()
        self.run_entry["heartbeat"] = beat_time
        self.run_entry["captured_out"] = self.get_captured_out()
        self.run_entry["result"] = self.result
        save_json(
            self.s3_res, self.bucket_name, self.runs_key, "run.json", self.run_entry
        )
        save_json(
            self.s3_res,
            self.bucket_name,
            self.runs_key,
            f"step_{self.step_name}.json",
            self.run_entry,
        )

    def completed(self, status="COMPLETED", except_info=None):
        self.stop_heartbeat()
        self.get_captured_out()
        self.run_entry["captured_out"] = self.captured_out
        self.run_entry["result"] = self.result
        save_json(
            self.s3_res, self.bucket_name, self.runs_key, "run.json", self.run_entry
        )
        self.run_entry["status"] = status
        self.run_entry["stop_time"] = round(time.time())
        self.run_entry["elapsed_time"] = round(
            self.run_entry["stop_time"] - self.start_time, 2
        )
        self.run_entry["host"] = get_host_info()
        if except_info is not None:
            self.run_entry["fail_trace"] = tb.format_exception(
                except_info["exc_type"], except_info["exc_value"], except_info["trace"]
            )
        save_json(
            self.s3_res,
            self.bucket_name,
            self.runs_key,
            f"step_{self.step_name}.json",
            self.run_entry,
        )

    def save_file(self, file_save_dir, filename, target_name=None):
        target_name = target_name or os.path.basename(filename)
        key = s3_join(file_save_dir, target_name)
        put_data(self.s3_res, self.bucket_name, key, open(filename, "rb"))

    def init_keys(self):
        self.runs_key = s3_join(self.exp_base_key, "runs")
        self.metrics_key = s3_join(self.exp_base_key, "metrics")
        self.artifacts_key = s3_join(self.exp_base_key, "artifacts")
        self.resource_key = s3_join(self.exp_base_key, "resources")
        self.source_key = s3_join(self.exp_base_key, "sources")
        self.metrics_table_key = s3_join(
            self.flow_base_key, "experiments", "metrics", self.flow_run_id
        )
        self.artifacts_table_key = s3_join(
            self.flow_base_key, "experiments", "artifacts", self.flow_run_id
        )

        self.keys = (
            self.runs_key,
            self.metrics_key,
            self.artifacts_key,
            self.resource_key,
            self.source_key,
        )

In [None]:
tracker = StepTracker(_bucket_name, _flow_base_key, _flow_run_id, _steps[0])

# Metrics

In [None]:
tracker.log_metric("auc", 0.37, 0)
tracker.log_metric("auc", 0.45, 1)
tracker.log_metric("auc", 0.63, 2)
tracker.log_metric("auc", 0.89, 3)
tracker.log_metric("r2", 0.66, 0)
tracker.log_metric("r2", 0.67, 1)
tracker.log_metric("rmse", 0, 0)

In [None]:
metrics_by_name = json.loads(
    pd.read_json(
        f"s3://{_bucket_name}/{_flow_base_key}/{_flow_run_id}/experiment/metrics/metrics.json"
    ).to_json()
)

In [None]:
def metrics_to_df(metrics_by_name, flow_run_id):
    metric_frames = []
    for metric_name, metric_ptr in metrics_by_name.items():
        metric_frame = pd.DataFrame(metric_ptr)
        metric_frame["metric"] = metric_name
        metric_frames.append(metric_frame)
    metrics = pd.concat(metric_frames).reset_index(drop=True)
    metrics["flow_run_id"] = flow_run_id
    return metrics

In [None]:
metrics = metrics_to_df(metrics_by_name, _flow_run_id)
assert metrics["steps"].tolist() == [0, 1, 2, 3, 0, 1, 0]
assert metrics["values"].tolist() == [0.37, 0.45, 0.63, 0.89, 0.66, 0.67, 0]
assert metrics["metric"].tolist() == ["auc", "auc", "auc", "auc", "r2", "r2", "rmse"]

In [None]:
contents = list_bucket(_bucket_name, _flow_run_key)
assert len(contents) >= 1
assert (
    len(
        [
            Path(c).name
            for c in contents
            if Path(c).name == "metrics.json" or Path(c).name == "run.json"
        ]
    )
    == 2
)

# Out Capture

In [None]:
raised = False
try:
    tracker.get_captured_out()
except IOError:
    raised = True
assert raised

In [None]:
assert tracker.captured_out is None
with tracker.capture_out() as tracker._output_file:
    print("Some text")
    print("Some text")
    tracker.get_captured_out()
assert tracker.captured_out == "Some text\nSome text\n"
tracker.captured_out = None

# Artifacts

> Support is provided for the same artifact types as found in `sacred`; however we will not be testing the creation, saving or loading of mp4s here as this would require external dependencies for video creation such as ffmpeg. 

Supported artifact types:

* `.txt`: `text/csv`,
* `.csv`: `text/csv`,
* `.png`: `image/png`,
* `.jpg`: `image/jpeg`,
* `.mp4`: `video/mp4`,
* `.pickle`: `application/octet-stream`,

In [None]:
df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})

In [None]:
%matplotlib auto

In [None]:
with tempfile.TemporaryDirectory() as temp_dir:
    csv_path = f"{temp_dir}/testfile.csv"
    df.to_csv(csv_path)
    txt_path = f"{temp_dir}/testfile.txt"
    df.to_csv(txt_path)
    fig = df.a.plot.hist().figure
    png_path = f"{temp_dir}/testfile.png"
    fig.savefig(png_path)
    pdf_path = f"{temp_dir}/testfile.pdf"
    fig.savefig(pdf_path)
    pickle_path = f"{temp_dir}/testfile.pkl"
    df.to_pickle(pickle_path)
    artifacts = [csv_path, txt_path, png_path, pdf_path, pickle_path]
    for artifact_path in artifacts:
        tracker.add_artifact(artifact_path)

# Heartbeat

In [None]:
# slow

with tracker.capture_out() as tracker._output_file:
    tracker.start_heartbeat(1.0)
    print("Some text")
    time.sleep(3)
    print("Some text")
    tracker.stop_heartbeat()
    tracker.get_captured_out()
assert len([t for t in tracker.captured_out.split("\n") if t == "Some text"]) == 2

# Finish Tracking

In [None]:
step_entry_key = s3_join(
    _flow_base_key, _flow_run_id, "experiment", "runs", f"step_{_step_name}.json"
)
step_entry = load_json(_s3_res, _bucket_name, step_entry_key)
assert step_entry["status"] == "RUNNING"

In [None]:
tracker.completed()

In [None]:
step_entry_key = s3_join(
    _flow_base_key, _flow_run_id, "experiment", "runs", f"step_{_step_name}.json"
)
step_entry = load_json(_s3_res, _bucket_name, step_entry_key)
assert step_entry["status"] == "COMPLETED"

# Add a second step..

In [None]:
tracker = StepTracker(_bucket_name, _flow_base_key, _flow_run_id, _steps[1])
with tempfile.TemporaryDirectory() as temp_dir:
    with tracker.capture_out() as tracker._output_file:
        tracker.start_heartbeat(1.0)
        tracker.log_metric("qini_auc", 0.5, 0)
        tracker.log_metric("qini_auc", 0.6, 1)
        csv_path = f"{temp_dir}/{_steps[1]}_file.csv"
        df.to_csv(csv_path)
        tracker.add_artifact(csv_path)
    tracker.completed()

In [None]:
flow_tracker.completed()

In [None]:
run_entry_key = s3_join(_flow_base_key, _flow_run_id, "experiment", "runs", "run.json")
run_entry = load_json(_s3_res, _bucket_name, run_entry_key)
assert run_entry["status"] == "COMPLETED"

In [None]:
metrics_by_name = json.loads(
    pd.read_json(
        f"s3://{_bucket_name}/{_flow_base_key}/{_flow_run_id}/experiment/metrics/metrics.json"
    ).to_json()
)
metrics = metrics_to_df(metrics_by_name, _flow_run_id)

In [None]:
assert sorted(metrics.metric.unique()) == ["auc", "qini_auc", "r2", "rmse"]

In [None]:
artifacts = [
    "testfile.csv",
    "testfile.txt",
    "testfile.png",
    "testfile.pdf",
    "testfile.pkl",
]

In [None]:
# check artifacts exist

In [None]:
# check metrics logged

In [None]:
captured_out = """******BEGIN step: experiment-test******\n
Starting Heartbeat\n
Some text\n
Emitted heartbeat at: 2022-06-09T11:40:25.112986\n
Emitted heartbeat at: 2022-06-09T11:40:26.223391\n
Some text\n
Stopping Heartbeat\n
Emitted heartbeat at: 2022-06-09T11:40:27.116552\n
******END step: experiment-test******\n"""

In [None]:
# has a positive long stop time
# has a positive float elapsed_time
# has an all_hosts entry with len 1
# caputed out has beginning & end text and 2 Some texts

In [None]:
# run_entry

In [None]:
# delete_dir(s3_res, bucket_name, flow_base_key)

# Test Full Flow Tracking

In [None]:
def first_step(tracker):
    print("Some text")
    time.sleep(2)
    tracker.log_metric("auroc", 0.5, 0)
    csv_path = f"{temp_dir}/testfile.csv"
    df.to_csv(csv_path)
    time.sleep(2)
    tracker.add_artifact(csv_path)
    time.sleep(2)
    fig = df.a.plot.hist().figure
    png_path = f"{temp_dir}/testfile.png"
    fig.savefig(png_path)
    tracker.add_artifact(png_path)
    print("Some text")

In [None]:
def second_step(tracker):
    print("More text")
    time.sleep(2)
    tracker.log_metric("auroc", 0.8, 0)

In [None]:
steps = ["first_step", "second_step"]

In [None]:
_flow_run_id = f"sample_flow_instance_{str(uuid.uuid4())[-6:]}"
_flow_run_key = s3_join(_flow_base_key, _flow_run_id)
_steps = ["first_step", "second_step"]

In [None]:
# slow

flow_tracker = FlowTracker(_bucket_name, _flow_base_key, _flow_run_id, _steps)

try:
    flow_tracker.start()

    try:
        tracker = StepTracker(_bucket_name, _flow_base_key, _flow_run_id, "first_step")
        with tempfile.TemporaryDirectory() as temp_dir:
            with tracker.capture_out() as tracker._output_file:
                tracker.start_heartbeat(1.0)
                first_step(tracker)
                tracker.completed()
    except BaseException:
        exc_type, exc_value, trace = sys.exc_info()
        except_info = {"exc_type": exc_type, "exc_value": exc_value, "trace": trace}
        tracker.completed(status="FAILED", except_info=except_info)

    try:
        tracker = StepTracker(_bucket_name, _flow_base_key, _flow_run_id, "second_step")
        with tempfile.TemporaryDirectory() as temp_dir:
            with tracker.capture_out() as tracker._output_file:
                tracker.start_heartbeat(1.0)
                second_step(tracker)
                tracker.completed()
    except BaseException:
        exc_type, exc_value, trace = sys.exc_info()
        except_info = {"exc_type": exc_type, "exc_value": exc_value, "trace": trace}
        tracker.completed(status="FAILED", except_info=except_info)

    flow_tracker.completed()
except (KeyboardInterrupt):
    flow_tracker.interrupted()
    print(f"Flow interrupted by user: {_flow_run_id}")
except BaseException:
    exc_type, exc_value, trace = sys.exc_info()
    except_info = {"exc_type": exc_type, "exc_value": exc_value, "trace": trace}
    flow_tracker.failed(except_info)
    print(f"Flow failed: {_flow_run_id}")
# Exception interrupt handling
# Failure

In [None]:
_flow_run_id = f"sample_flow_instance_{str(uuid.uuid4())[-6:]}"
_flow_run_key = s3_join(_flow_base_key, _flow_run_id)

In [None]:
# slow

flow_tracker = FlowTracker(_bucket_name, _flow_base_key, _flow_run_id, _steps)

try:
    flow_tracker.start()
    flow_tracker.completed()
except (KeyboardInterrupt):
    flow_tracker.interrupted()
    print(f"Flow interrupted by user: {_flow_run_id}")
except BaseException:
    exc_type, exc_value, trace = sys.exc_info()
    except_info = {"exc_type": exc_type, "exc_value": exc_value, "trace": trace}
    flow_tracker.failed(except_info)
    print(f"Flow failed: {_flow_run_id}")
# Exception interrupt handling
# Failure

In [None]:
_flow_run_id = f"sample_flow_instance_{str(uuid.uuid4())[-6:]}"
_flow_run_key = s3_join(_flow_base_key, _flow_run_id)

In [None]:
# slow

flow_tracker = FlowTracker(_bucket_name, _flow_base_key, _flow_run_id, _steps)

try:
    flow_tracker.start()

    try:
        tracker = StepTracker(_bucket_name, _flow_base_key, _flow_run_id, "first_step")
        with tempfile.TemporaryDirectory() as temp_dir:
            with tracker.capture_out() as tracker._output_file:
                raise ValueError()
                tracker.start_heartbeat(1.0)
                tracker.completed()
    except BaseException:
        exc_type, exc_value, trace = sys.exc_info()
        except_info = {"exc_type": exc_type, "exc_value": exc_value, "trace": trace}
        tracker.completed(status="FAILED", except_info=except_info)

    raise ValueError()
except (KeyboardInterrupt):
    flow_tracker.interrupted()
    print(f"Flow interrupted by user: {_flow_run_id}")
except BaseException:
    exc_type, exc_value, trace = sys.exc_info()
    except_info = {"exc_type": exc_type, "exc_value": exc_value, "trace": trace}
    flow_tracker.failed(except_info)
    print(f"Flow failed: {_flow_run_id}")
# Exception interrupt handling
# Failure

In [None]:
_flow_run_id = f"sample_flow_instance_{str(uuid.uuid4())[-6:]}"
_flow_run_key = s3_join(_flow_base_key, _flow_run_id)

In [None]:
# slow

flow_tracker = FlowTracker(_bucket_name, _flow_base_key, _flow_run_id, _steps)

try:
    flow_tracker.start()

    raise ValueError()

    flow_tracker.completed()
except (KeyboardInterrupt):
    flow_tracker.interrupted()
    print(f"Flow interrupted by user: {_flow_run_id}")
except BaseException:
    exc_type, exc_value, trace = sys.exc_info()
    except_info = {"exc_type": exc_type, "exc_value": exc_value, "trace": trace}
    flow_tracker.failed(except_info)
    print(f"Flow failed: {_flow_run_id}")
# Exception interrupt handling
# Failure

In [None]:
# slow

run_entry_key = s3_join(_flow_base_key, _flow_run_id, "experiment", "runs")
run_entry = load_json(_s3_res, _bucket_name, s3_join(run_entry_key, "run.json"))
first_step_entry = load_json(
    _s3_res, _bucket_name, s3_join(run_entry_key, "step_first_step.json")
)
second_step_entry = load_json(
    _s3_res, _bucket_name, s3_join(run_entry_key, "step_second_step.json")
)

In [None]:
# slow
delete_dir(_s3_res, _bucket_name, _flow_base_key)