In [1]:
from pprint import pprint

import pandas as pd
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV

import mlflow
from utils import fetch_logged_data


def main():
    mlflow.sklearn.autolog()

    iris = datasets.load_iris()
    parameters = {"kernel": ("linear", "rbf"), "C": [1, 10]}
    svc = svm.SVC()
    clf = GridSearchCV(svc, parameters)

    clf.fit(iris.data, iris.target)
    run_id = mlflow.last_active_run().info.run_id

    # show data logged in the parent run
    print("========== parent run ==========")
    for key, data in fetch_logged_data(run_id).items():
        print("\n---------- logged {} ----------".format(key))
        pprint(data)

    # show data logged in the child runs
    filter_child_runs = "tags.mlflow.parentRunId = '{}'".format(run_id)
    runs = mlflow.search_runs(filter_string=filter_child_runs)
    param_cols = ["params.{}".format(p) for p in parameters.keys()]
    metric_cols = ["metrics.mean_test_score"]

    print("\n========== child runs ==========\n")
    pd.set_option("display.max_columns", None)  # prevent truncating columns
    print(runs[["run_id", *param_cols, *metric_cols]])


if __name__ == "__main__":
    main()

2022/07/05 14:30:15 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '8e492616775e46f787f2d5a4bf2d4e2e', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow
2022/07/05 14:30:24 INFO mlflow.sklearn.utils: Logging the 5 best runs, no runs will be omitted.



---------- logged params ----------
{'best_C': '1',
 'best_kernel': 'linear',
 'cv': 'None',
 'error_score': 'nan',
 'estimator': 'SVC()',
 'n_jobs': 'None',
 'param_grid': "{'kernel': ('linear', 'rbf'), 'C': [1, 10]}",
 'pre_dispatch': '2*n_jobs',
 'refit': 'True',
 'return_train_score': 'False',
 'scoring': 'None',
 'verbose': '0'}

---------- logged metrics ----------
{'best_cv_score': 0.9800000000000001,
 'training_accuracy_score': 0.9933333333333333,
 'training_f1_score': 0.9933326665999933,
 'training_precision_score': 0.9934640522875816,
 'training_recall_score': 0.9933333333333333,
 'training_score': 0.9933333333333333}

---------- logged tags ----------
{'estimator_class': 'sklearn.model_selection._search.GridSearchCV',
 'estimator_name': 'GridSearchCV'}

---------- logged artifacts ----------
['best_estimator/MLmodel',
 'best_estimator/conda.yaml',
 'best_estimator/model.pkl',
 'best_estimator/python_env.yaml',
 'best_estimator/requirements.txt',
 'cv_results.csv',
 'model/M