In [1]:
%env AZURE_EXTENSION_DIR=/home/schrodinger/automl/sdk-cli-v2/src/cli/src
%env AZURE_ML_CLI_PRIVATE_FEATURES_ENABLED=true

env: AZURE_EXTENSION_DIR=/home/schrodinger/automl/sdk-cli-v2/src/cli/src
env: AZURE_ML_CLI_PRIVATE_FEATURES_ENABLED=true


# Setup

## Imports

In [20]:
import os

import mlflow
from mlflow.tracking import MlflowClient

import pandas as pd    # For pretty printing some results

## Setting necessary context

In [10]:
subscription_id = '381b38e9-9840-4719-a5a0-61d9585e1e91'
resource_group_name = 'gasi_rg_neu'
workspace_name = "gasi_ws_neu"
experiment_name = "automl-classification-bmarketing-all"

## Initialize MLFlow Client

The models and artifacts that are produced by AutoML can be accessed via. the MLFlow interface. Initialize the MLFlow client here, and set the backend as Azure ML, via. the MLFlow Client.

In [11]:
tracking_uri = "TODO --> Get this from MLClient"

################################################################################
# TODO: The API to get tracking URI is not yet available on Worksapce object.
from azureml.core import Workspace as WorkspaceV1
ws = WorkspaceV1(workspace_name=workspace_name, resource_group=resource_group_name, subscription_id=subscription_id)
tracking_uri = ws.get_mlflow_tracking_uri()
del ws
################################################################################

mlflow.set_tracking_uri(tracking_uri)
mlflow.set_experiment(experiment_name)

print("\nCurrent tracking uri: {}".format(mlflow.get_tracking_uri()))


Current tracking uri: azureml://northeurope.experiments.azureml.net/mlflow/v1.0/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourceGroups/gasi_rg_neu/providers/Microsoft.MachineLearningServices/workspaces/gasi_ws_neu?


# Results

Access the results (such as Models, Artifacts, Metrics) of a previously completed AutoML Run.

## Retrieve the Best Trial

In [12]:
job_name = "AutoML_b120a44d-ecb4-4494-b644-f93c265e1028"

mlflow_client = MlflowClient()
mlflow_parent_run = mlflow_client.get_run(job_name)

best_child_run_id = mlflow_parent_run.data.tags["automl_best_child_run_id"]
print("Found best child run id: ", best_child_run_id)

best_run = mlflow_client.get_run(best_child_run_id)
best_run

Found best child run id:  AutoML_b120a44d-ecb4-4494-b644-f93c265e1028_0


<Run: data=<RunData: metrics={'AUC_macro': 0.9537056697431252,
 'AUC_micro': 0.9826734687571428,
 'AUC_weighted': 0.9537056697431252,
 'accuracy': 0.9237493929091792,
 'average_precision_score_macro': 0.8430327340366603,
 'average_precision_score_micro': 0.9834011986147951,
 'average_precision_score_weighted': 0.959992859687783,
 'balanced_accuracy': 0.7717056650246306,
 'f1_score_macro': 0.7936078137928921,
 'f1_score_micro': 0.9237493929091792,
 'f1_score_weighted': 0.9205655010653557,
 'log_loss': 0.16640483179348017,
 'matthews_correlation': 0.5909070642243929,
 'norm_macro_recall': 0.5434113300492611,
 'precision_score_macro': 0.8212770320032137,
 'precision_score_micro': 0.9237493929091792,
 'precision_score_weighted': 0.9188551905972525,
 'recall_score_macro': 0.7717056650246306,
 'recall_score_micro': 0.9237493929091792,
 'recall_score_weighted': 0.9237493929091792,
 'weighted_accuracy': 0.9617508999033835}, params={}, tags={'_aml_system_ComputeTargetStatus': '{"AllocationState

## Get best run metrics

In [19]:
pd.DataFrame(best_run.data.metrics, index=[0]).T

Unnamed: 0,0
recall_score_micro,0.923749
average_precision_score_micro,0.983401
matthews_correlation,0.590907
AUC_micro,0.982673
AUC_weighted,0.953706
f1_score_weighted,0.920566
norm_macro_recall,0.543411
precision_score_micro,0.923749
average_precision_score_weighted,0.959993
accuracy,0.923749


## Download the best model locally

In [21]:
local_dir = "/tmp/artifact_downloads"
if not os.path.exists(local_dir):
    os.mkdir(local_dir)
local_path = mlflow_client.download_artifacts(best_run.info.run_id, "outputs", local_dir)
print("Artifacts downloaded in: {}".format(local_path))
print("Artifacts: {}".format(os.listdir(local_path)))

Artifacts downloaded in: /tmp/artifact_downloads/outputs
Artifacts: ['pipeline_graph.json', 'model_onnx.json', 'env_dependencies.json', 'scoring_file_v_1_0_0.py', 'model.pkl', 'conda.yaml', 'MLmodel', 'scoring_file_v_2_0_0.py', 'requirements.txt', 'conda_env_v_1_0_0.yml', 'model.onnx']


In [23]:
%%sh
cat /tmp/artifact_downloads/outputs/MLmodel

flavors:
  python_function:
    env: conda.yaml
    loader_module: mlflow.sklearn
    model_path: model.pkl
    python_version: 3.6.2
  sklearn:
    pickled_model: model.pkl
    serialization_format: pickle
    sklearn_version: 0.22.1
utc_time_created: '2021-08-31 01:07:56.436694'


### Access other runs

Use MLFlow filters to get runs that are based on some custom criteria (e.g. an onnx model, or the best run with a different non-primary metric)

In [34]:
# Search all child runs with a parent id
experiment = mlflow_client.get_experiment_by_name(experiment_name)

###########################################################################################
# Steps:
# 1. Get all child runs for the parent run, filtered on runs that have ONNX resource on the properties, 
# & sorted on primary metrics (or any other metric)
# 2. Take the head of that list - which will be the best ONNX model

# TODO: This filter should work - but currently, the child runs don't have this tag set.
# The single quotes around 'mlflow.parentRunId' are required due to a bug in AzureML MLFlow.
# https://msdata.visualstudio.com/Vienna/_queries/edit/1252056/
# https://msdata.visualstudio.com/Vienna/_workitems/edit/1326141
query = "tags.'mlflow.parentRunId' = '{}'".format(mlflow_parent_run.info.run_id)
results = mlflow_client.search_runs(
    experiment_ids=experiment.experiment_id, filter_string=query, order_by=["metrics.accuracy DESC"]
)
###########################################################################################

# print(results[["run_id", "params.child", "tags.mlflow.runName"]])
results

[]