# Build RAI Dashboard with python SDK

In [1]:
# make sure you run this notebook on azureml_py38 conda environment (aka Python 3.8 - AzureML - one of the default conda environments on AML compute instances)
# uncomment this line and install these dependencies if you are running this notebook for the first time
# !pip install azure-ai-ml

In [2]:
from azure.ai.ml import MLClient,Input, dsl, Output
from azure.identity import DefaultAzureCredential
from azureml.core import Workspace, Dataset
import json
import uuid
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities import PipelineJob
import time
import toml

In [3]:
# load Azure configuration 
config = toml.load("config.txt")
subscription_id = config["subscription_id"]
resource_group = config["resource_group"]
workspace_name = config["workspace_name"]

In [4]:
# specify model configurations
MODEL_NAME="ProxyNominationRF"
MODEL_VERSION = "22"
COMPUTE = "demo-test" 

train_dataset_name = "dataset-RAI_MainData_train_numeric"
test_dataset_name = "dataset-RAI_MainData_test_sample_numeric"
train_dataset_version = "25"
test_dataset_version = "25"
model_label = "RF"
target_column = "ProxyNomination"
sensitive_features = ["EconomicDisadvantageSTAS_Y", "Disability_Y"]
data_explorer_features = ["EconomicDisadvantageSTAS_Y", "sum_absentStreak"]
treatment_features = ["EconomicDisadvantageSTAS_Y", "Action_cnt_2019", "sum_absentStreak"]
categorical_features = []  
filter_columns = []

In [5]:
train_pq = Input(
    type="mltable",
    path=f"azureml:{train_dataset_name}:{train_dataset_version}",
    mode="direct",
)

test_pq = Input(
    type="mltable",
    path=f"azureml:{test_dataset_name}:{test_dataset_version}",
    mode="direct",
)

azureml_model_id = f"azureml:{MODEL_NAME}:{MODEL_VERSION}"

registry_name = "azureml"
ml_client_registry = MLClient(
    credential=DefaultAzureCredential(),
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    registry_name=registry_name,
)

ml_client = MLClient(
    credential=DefaultAzureCredential(),
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name
)

In [6]:

def submit_and_wait(ml_client, pipeline_job) -> PipelineJob:
    created_job = ml_client.jobs.create_or_update(pipeline_job)
    assert created_job is not None

    print("Pipeline job can be accessed in the following URL:")
    print(f"{created_job.studio_url}")

    while created_job.status not in [
        "Completed",
        "Failed",
        "Canceled",
        "NotResponding",
    ]:
        time.sleep(30)
        created_job = ml_client.jobs.get(created_job.name)
        print("Latest status : {0}".format(created_job.status))
    assert created_job.status == "Completed"
    return created_job

In [7]:
label = "latest"

rai_constructor_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_insight_constructor", label=label
)

# We get latest version and use the same version for all components
version = rai_constructor_component.version
print("The current version of RAI built-in components is: " + version)

rai_explanation_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_explanation", version=version
)

rai_causal_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_causal", version=version
)

rai_counterfactual_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_counterfactual", version=version
)

rai_erroranalysis_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_erroranalysis", version=version
)

rai_gather_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_insight_gather", version=version
)

rai_scorecard_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_score_card", version=version
)

The current version of RAI built-in components is: 0.5.0


In [8]:
import json
score_card_config_dict = {
    "Model": {
        "ModelName": f"{MODEL_NAME}",
        "ModelType": "Classification",
        "ModelSummary": "This is a Classification model.",
    },
    # the thresholds here are for illustration purpose only and should be set by user's preference
    "Metrics": {"accuracy_score": {"threshold": ">=0.75"}, "recall_score": {"threshold": ">=0.70"}},
    "FeatureImportance": {"top_n": 5},
    "DataExplorer": {"features": data_explorer_features},
    "Fairness": {
        "metric": ["accuracy_score", "recall_score", "precision_score", "f1_score"],
        "sensitive_features": sensitive_features,
        "fairness_evaluation_kind": "difference",
    },
}

score_card_config_filename = "RAI_Score_Card_Config.json"

with open(score_card_config_filename, "w") as f:
    json.dump(score_card_config_dict, f)

score_card_config_path = Input(
    type="uri_file", path=score_card_config_filename, mode="download"
)

In [9]:
import json
from azure.ai.ml import Input
from azure.ai.ml.constants import AssetTypes

categorical_columns = json.dumps(categorical_features)
treatment_features = json.dumps(treatment_features)

filter_columns = json.dumps(filter_columns)


@dsl.pipeline(
    compute=COMPUTE,
    description="Example RAI computation on programmers data",
    experiment_name="RAI_dashboard",
)
def rai_pipeline(
    target_column_name,
    train_data,
    test_data,
    score_card_config_path,
):
    # Initiate the RAIInsights
    create_rai_job = rai_constructor_component(
        title="RAI_job",
        task_type="classification",
        model_info=f"{MODEL_NAME}:{MODEL_VERSION}",
        model_input=Input(type=AssetTypes.MLFLOW_MODEL, path=azureml_model_id),
        train_dataset=train_data,
        test_dataset=test_data,
        target_column_name=target_column_name,
        categorical_column_names=categorical_columns,
    )
    create_rai_job.set_limits(timeout=120)

    explain_job = rai_explanation_component(
        comment="Explanation for the dataset",
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
    )
    explain_job.set_limits(timeout=120)

    causal_job = rai_causal_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        treatment_features=treatment_features,
    )
    causal_job.set_limits(timeout=180)

    counterfactual_job = rai_counterfactual_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        total_cfs=10,
        desired_class="opposite",
    )
    counterfactual_job.set_limits(timeout=600)

    erroranalysis_job = rai_erroranalysis_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        filter_features=filter_columns,
    )
    erroranalysis_job.set_limits(timeout=120)

    rai_gather_job = rai_gather_component(
        constructor=create_rai_job.outputs.rai_insights_dashboard,
        insight_1=explain_job.outputs.explanation,
        insight_2=causal_job.outputs.causal,
        # insight_3=counterfactual_job.outputs.counterfactual,
        insight_4=erroranalysis_job.outputs.error_analysis,
    )
    rai_gather_job.set_limits(timeout=800)

    rai_gather_job.outputs.dashboard.mode = "upload"
    rai_gather_job.outputs.ux_json.mode = "upload"

    rai_scorecard_job = rai_scorecard_component(
        dashboard=rai_gather_job.outputs.dashboard,
        pdf_generation_config=score_card_config_path,
    )

    return {
        "dashboard": rai_gather_job.outputs.dashboard,
        "ux_json": rai_gather_job.outputs.ux_json,
        "scorecard": rai_scorecard_job.outputs.scorecard,
    }

In [10]:
# Pipeline to construct the RAI Insights
insights_pipeline_job = rai_pipeline(
    target_column_name=target_column,
    train_data=train_pq,
    test_data=test_pq,
    score_card_config_path=score_card_config_path,
)

# Workaround to enable the download
rand_path = str(uuid.uuid4())
insights_pipeline_job.outputs.dashboard = Output(
    path=f"azureml://datastores/workspaceblobstore/paths/{rand_path}/dashboard/",
    mode="upload",
    type="uri_folder",
)
insights_pipeline_job.outputs.ux_json = Output(
    path=f"azureml://datastores/workspaceblobstore/paths/{rand_path}/ux_json/",
    mode="upload",
    type="uri_folder",
)
insights_pipeline_job.outputs.scorecard = Output(
    path=f"azureml://datastores/workspaceblobstore/paths/{rand_path}/scorecard/",
    mode="upload",
    type="uri_folder",
)

In [11]:
insights_job = submit_and_wait(ml_client, insights_pipeline_job)

Pipeline job can be accessed in the following URL:
https://ml.azure.com/runs/quirky_forest_3hwhycspr3?wsid=/subscriptions/3ad04f94-581c-468f-b2e4-966452b69cec/resourcegroups/tasedu-oea-aso-prod-rg/workspaces/tasedu-oea-ase-prod-mlw&tid=a9be3ac7-0c60-491e-8b3b-a32f8f46aec8
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Running
Latest status : Completed


## Download RAI scorecard

In [12]:
target_directory = "."

ml_client.jobs.download(
    insights_job.name, download_path=target_directory, output_name="scorecard"
)


Downloading artifact azureml://subscriptions/3ad04f94-581c-468f-b2e4-966452b69cec/resourcegroups/tasedu-oea-aso-prod-rg/workspaces/tasedu-oea-ase-prod-mlw/datastores/workspaceblobstore/paths/3ad921b8-78d7-46ea-9b62-9529e55db20c/scorecard/ to named-outputs/scorecard
