In [None]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient


credential = DefaultAzureCredential()
# Check if given credential can get token successfully.
credential.get_token("https://management.azure.com/.default")


ml_client = MLClient.from_config(credential=credential)

In [18]:
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

# read training and test dataset
df_training = pd.read_csv("diabetes-train-data/diabetes-train.csv")
df_test = pd.read_csv("diabetes-test-data/diabetes-test.csv")

# display the first few rows of the training dataset
df_training.head()

# convert data to table
table_training = pa.Table.from_pandas(df_training)
table_test = pa.Table.from_pandas(df_test)

# write tables out to parquet
pq.write_table(table_training, "diabetes-train-data/diabetes-training.parquet", version="1.0")
pq.write_table(table_test, "diabetes-test-data/diabetes-test.parquet", version="1.0")

In [2]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes
import mltable
from azure.ai.ml._artifacts._artifact_utilities import download_artifact_from_aml_uri
import os, pandas as pd

input_train_data = "diabetes_train_rai_mltable"
input_test_data = "diabetes_test_rai_mltable"
data_version = "4"

train_data_path = "diabetes-train-data/"
test_data_path = "diabetes-test-data/"


# train_data = Data(
#     path=train_data_path,
#     type=AssetTypes.MLTABLE,
#     description="RAI diabetes training data",
#     name=input_train_data,
#     version=data_version,
# )
# ml_client.data.create_or_update(train_data)

# test_data = Data(
#     path=test_data_path,
#     type=AssetTypes.MLTABLE,
#     description="RAI diabetes test data",
#     name=input_test_data,
#     version=data_version,
# )

# ml_client.data.create_or_update(test_data)





In [None]:
# Get handle to azureml registry for the RAI built in components
registry_name = "azureml"

ml_client_registry = MLClient(
    credential=credential,
    subscription_id=ml_client.subscription_id,
    resource_group_name=ml_client.resource_group_name,
    registry_name=registry_name,
)

print(ml_client_registry)     

In [None]:
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

# Register model
# file_model = Model(
#     path="model",
#     type=AssetTypes.MLFLOW_MODEL,
#     name="local-mlflow-diabetes",
#     description="Model created from local file.",
# )

# model = Model(
#     name="model-rai-new",
#     path="./model-rai",
#     type=AssetTypes.MLFLOW_MODEL,
#     description="RAI model",
# )
# model = ml_client.models.create_or_update(model) 

model = ml_client.models.get("model-rai-new","1")

print(model)

# Model parameters
model_name = "local-mlflow-diabetes"
expected_model_id = f"{model.name}:{model.version}"
azureml_model_id = f"azureml:{expected_model_id}"

print(azureml_model_id)

In [4]:
label = "latest"

rai_constructor_component = ml_client_registry.components.get(
    name="rai_tabular_insight_constructor", label=label
)

# we get latest version and use the same version for all components
version = rai_constructor_component.version
print("The current version of RAI built-in components is: " + version)

rai_erroranalysis_component = ml_client_registry.components.get(
    name="rai_tabular_erroranalysis", version=version
)

rai_explanation_component = ml_client_registry.components.get(
    name="rai_tabular_explanation", version=version
)

rai_gather_component = ml_client_registry.components.get(
    name="rai_tabular_insight_gather", version=version
)

The current version of RAI built-in components is: 0.21.0


In [11]:
from azure.ai.ml import Input, dsl
from azure.ai.ml.constants import AssetTypes

compute_name = "ml-compute-ntb"

@dsl.pipeline(
    compute=compute_name,
    description="RAI insights on diabetes data",
    experiment_name=f"RAI_insights_{model_name}",
)
def rai_decision_pipeline(
    target_column_name, train_data, test_data
):
    # Initiate the RAIInsights
    create_rai_job = rai_constructor_component(
        title="RAI dashboard diabetes",
        task_type="classification",
        model_info=expected_model_id,
        model_input=Input(type=AssetTypes.MLFLOW_MODEL, path=azureml_model_id),
        train_dataset=train_data,
        test_dataset=test_data,
        target_column_name=target_column_name,
        # use_model_dependency=True,
    )
    create_rai_job.set_limits(timeout=300)

    # Add error analysis
    error_job = rai_erroranalysis_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
    )
    error_job.set_limits(timeout=300)

    # Add explanations
    explanation_job = rai_explanation_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        comment="add explanation", 
    )
    explanation_job.set_limits(timeout=300)

    # Combine everything
    rai_gather_job = rai_gather_component(
        constructor=create_rai_job.outputs.rai_insights_dashboard,
        insight_3=error_job.outputs.error_analysis,
        insight_4=explanation_job.outputs.explanation,
    )
    rai_gather_job.set_limits(timeout=300)

    rai_gather_job.outputs.dashboard.mode = "upload"

    return {
        "dashboard": rai_gather_job.outputs.dashboard,
    }

In [12]:
from azure.ai.ml import Input
target_feature = "Diabetic"

diabetes_train_pq = Input(
    type="mltable",
    path=f"azureml:{input_train_data}:{data_version}",
    mode="download",
)
diabetes_test_pq = Input(
    type="mltable",
    path=f"azureml:{input_test_data}:{data_version}",
    mode="download",
)

In [13]:
import uuid
from azure.ai.ml import Output

# Pipeline to construct the RAI Insights
insights_pipeline_job = rai_decision_pipeline(
    target_column_name="Diabetic",
    train_data=diabetes_train_pq,
    test_data=diabetes_test_pq,
)

# Workaround to enable the download
rand_path = str(uuid.uuid4())
insights_pipeline_job.outputs.dashboard = Output(
    path=f"azureml://datastores/workspaceblobstore/paths/{rand_path}/dashboard/",
    mode="upload",
    type="uri_folder",
)

In [None]:
from azure.ai.ml.entities import PipelineJob
from IPython.core.display import HTML
from IPython.display import display
import time

def submit_and_wait(ml_client, pipeline_job) -> PipelineJob:
    created_job = ml_client.jobs.create_or_update(pipeline_job)
    assert created_job is not None

    print("Pipeline job can be accessed in the following URL:")
    # display(HTML('{0}'.format(created_job.studio_url)))

    while created_job.status not in [
        "Completed",
        "Failed",
        "Canceled",
        "NotResponding",
    ]:
        time.sleep(30)
        created_job = ml_client.jobs.get(created_job.name)
        print("Latest status : {0}".format(created_job.status))
    assert created_job.status == "Completed"
    return created_job


# This is the actual submission
insights_job = submit_and_wait(ml_client, insights_pipeline_job)
