In [7]:
%env AZURE_EXTENSION_DIR=/home/schrodinger/automl/sdk-cli-v2/src/cli/src
%env AZURE_ML_CLI_PRIVATE_FEATURES_ENABLED=true

env: AZURE_EXTENSION_DIR=/home/schrodinger/automl/sdk-cli-v2/src/cli/src
env: AZURE_ML_CLI_PRIVATE_FEATURES_ENABLED=true


# Setup

## Imports

In [22]:
import os

import mlflow
import mlflow.pyfunc
import mlflow.sklearn
from mlflow.tracking import MlflowClient

import pandas as pd

## Setting necessary context

In [9]:
subscription_id = '381b38e9-9840-4719-a5a0-61d9585e1e91'
resource_group_name = 'gasi_rg_neu'
workspace_name = "gasi_ws_neu"
experiment_name = "automl-classification-bmarketing-all"

## Initialize MLFlow Client

The models and artifacts that are produced by AutoML can be accessed via. the MLFlow interface. Initialize the MLFlow client here, and set the backend as Azure ML, via. the MLFlow Client.

In [10]:
tracking_uri = "TODO --> Get this from MLClient"

################################################################################
# TODO: The API to get tracking URI is not yet available on Worksapce object.
from azureml.core import Workspace as WorkspaceV1
ws = WorkspaceV1(workspace_name=workspace_name, resource_group=resource_group_name, subscription_id=subscription_id)
tracking_uri = ws.get_mlflow_tracking_uri()
del ws
################################################################################

mlflow.set_tracking_uri(tracking_uri)
mlflow.set_experiment(experiment_name)

print("\nCurrent tracking uri: {}".format(mlflow.get_tracking_uri()))


Current tracking uri: azureml://northeurope.experiments.azureml.net/mlflow/v1.0/subscriptions/381b38e9-9840-4719-a5a0-61d9585e1e91/resourceGroups/gasi_rg_neu/providers/Microsoft.MachineLearningServices/workspaces/gasi_ws_neu?


## Load the test data frame

In [19]:
test_df = pd.read_csv("https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_test.csv")
y_actual = test_df.pop("y")
test_df.shape, y_actual.shape

((4120, 20), (4120,))

# Load the best model

Access the results (such as Models, Artifacts, Metrics) of a previously completed AutoML Run.

Loading the models locally assume that you are running the notebook in an environment compatible with the model. The list of dependencies that is expected by the model is specified in the MLFlow model produced by AutoML (in the `conda.yaml` file)

In [12]:
# Get the best trial for the submitted job

job_name = "AutoML_b120a44d-ecb4-4494-b644-f93c265e1028"
mlflow_client = MlflowClient()
mlflow_parent_run = mlflow_client.get_run(job_name)

best_child_run_id = mlflow_parent_run.data.tags["automl_best_child_run_id"]

mlflow.pyfunc.loaded_model:
  flavor: mlflow.sklearn

## Using MLFlow PyFunc

In [20]:
pyfunc_model = mlflow.pyfunc.load_model("runs:/{}/outputs".format(best_child_run_id))
y_preds = pyfunc_model.predict(test_df)
y_preds

array(['yes', 'no', 'no', ..., 'yes', 'no', 'no'], dtype=object)

## Using MLFlow Sklearn Flavor

In [23]:
sklearn_model = mlflow.sklearn.load_model("runs:/{}/outputs".format(best_child_run_id))
y_preds_proba = sklearn_model.predict_proba(test_df)
y_preds_proba

Unnamed: 0,no,yes
0,0.17,0.83
1,0.98,0.02
2,0.90,0.10
3,1.00,0.00
4,1.00,0.00
...,...,...
4115,0.99,0.01
4116,1.00,0.00
4117,0.32,0.68
4118,1.00,0.00


# Model Interpretation & Analysis

In [24]:
custom_featurizer = sklearn_model.named_steps['datatransformer']
df = custom_featurizer.get_featurization_summary()
pd.DataFrame(data=df)

Unnamed: 0,RawFeatureName,TypeDetected,Dropped,EngineeredFeatureCount,Transformations
0,age,Numeric,No,1,[MeanImputer]
1,duration,Numeric,No,1,[MeanImputer]
2,emp.var.rate,Numeric,No,1,[MeanImputer]
3,cons.price.idx,Numeric,No,1,[MeanImputer]
4,cons.conf.idx,Numeric,No,1,[MeanImputer]
5,euribor3m,Numeric,No,1,[MeanImputer]
6,nr.employed,Numeric,No,1,[MeanImputer]
7,job,Categorical,No,12,[StringCast-CharGramCountVectorizer]
8,marital,Categorical,No,4,[StringCast-CharGramCountVectorizer]
9,education,Categorical,No,8,[StringCast-CharGramCountVectorizer]


Set `is_user_friendly=False` to get a more detailed summary for the transforms being applied.

In [25]:
df = custom_featurizer.get_featurization_summary(is_user_friendly=False)
pd.DataFrame(data=df)

Unnamed: 0,RawFeatureName,TypeDetected,Dropped,EngineeredFeatureCount,Transformations,TransformationParams
0,age,Numeric,No,1,[MeanImputer],"{'Transformer1': {'Input': ['age'], 'Transform..."
1,duration,Numeric,No,1,[MeanImputer],"{'Transformer1': {'Input': ['duration'], 'Tran..."
2,emp.var.rate,Numeric,No,1,[MeanImputer],"{'Transformer1': {'Input': ['emp.var.rate'], '..."
3,cons.price.idx,Numeric,No,1,[MeanImputer],"{'Transformer1': {'Input': ['cons.price.idx'],..."
4,cons.conf.idx,Numeric,No,1,[MeanImputer],"{'Transformer1': {'Input': ['cons.conf.idx'], ..."
5,euribor3m,Numeric,No,1,[MeanImputer],"{'Transformer1': {'Input': ['euribor3m'], 'Tra..."
6,nr.employed,Numeric,No,1,[MeanImputer],"{'Transformer1': {'Input': ['nr.employed'], 'T..."
7,job,Categorical,No,12,[StringCast-CharGramCountVectorizer],"{'Transformer1': {'Input': ['job'], 'Transform..."
8,marital,Categorical,No,4,[StringCast-CharGramCountVectorizer],"{'Transformer1': {'Input': ['marital'], 'Trans..."
9,education,Categorical,No,8,[StringCast-CharGramCountVectorizer],"{'Transformer1': {'Input': ['education'], 'Tra..."


## \<Link to Model Explanation examples>

# \<Link to Model test examples>