In [66]:
# 実行する AML workspace の情報を設定
subscription_id = ""
resource_group = ""
workspace = ""

rai_diabetes_regression_example_version_string = "1"

In [67]:
import json
from azure.ai.ml import dsl, Input
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

In [68]:
credential = DefaultAzureCredential()
ml_client = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace,
)
print(ml_client)

MLClient(credential=<azure.identity._credentials.default.DefaultAzureCredential object at 0x7f17980eb4f0>,
         subscription_id=27a05524-7a59-453b-88d7-df3ebaa2bcc1,
         resource_group_name=azuremlytate081922,
         workspace_name=azuremlwsytate081922)


In [69]:
# RAI 組み込みコンポーネントの azureml レジストリへのハンドルを取得
registry_name = "azureml"
ml_client_registry = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    registry_name=registry_name,
)
print(ml_client_registry)

MLClient(credential=<azure.identity._credentials.default.DefaultAzureCredential object at 0x7f17980eb4f0>,
         subscription_id=6c6683e9-e5fe-4038-8519-ce6ebec2ba15,
         resource_group_name=registry-builtin-prod-eastus-01,
         workspace_name=None)


In [70]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

train_data_path = "../data/mltable/train/"
test_data_path = "../data/mltable/test/"
input_train_data = "nyc_taxi_train_mltable_dataset"
input_test_data = "nyc_taxi_test_mltable_dataset"

# train, test のデータアセットがなければ作成
try:
    train_data = ml_client.data.get(
        name=input_train_data, version=rai_diabetes_regression_example_version_string
    )
    test_data = ml_client.data.get(
        name=input_test_data, version=rai_diabetes_regression_example_version_string
    )
except Exception as e:
    train_data = Data(
        path=train_data_path,
        type=AssetTypes.MLTABLE,
        description="RAI diabetes regression example training data",
        name=input_train_data,
        version=rai_diabetes_regression_example_version_string,
    )
    ml_client.data.create_or_update(train_data)

    test_data = Data(
        path=test_data_path,
        type=AssetTypes.MLTABLE,
        description="RAI diabetes regression example test data",
        name=input_test_data,
        version=rai_diabetes_regression_example_version_string,
    )
    ml_client.data.create_or_update(test_data)

In [71]:
model_name = "nyc_taxi_mlflow_rai"
compute_name = "cpu-cluster"
target_column_name = "totalAmount"
model_name_suffix = 4
expected_model_id = f"{model_name}:{model_name_suffix}"
azureml_model_id = f"azureml:{expected_model_id}"
programmers_train_mltable = Input(
    type="mltable",
    path=f"azureml:nyc_taxi_train_mltable_dataset:1",
    mode="download",
)
programmers_test_mltable = Input(
    type="mltable",
    path=f"azureml:nyc_taxi_test_mltable_dataset:1",
    mode="download",
)

In [72]:
label = "latest"

# 最新バージョンのコンポーネントを取得
rai_constructor_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_insight_constructor", label=label
)

version = rai_constructor_component.version
print("The current version of RAI built-in components is: " + version)

rai_explanation_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_explanation", version=version
)

rai_causal_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_causal", version=version
)

rai_counterfactual_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_counterfactual", version=version
)

rai_erroranalysis_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_erroranalysis", version=version
)

rai_gather_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_insight_gather", version=version
)

rai_scorecard_component = ml_client_registry.components.get(
    name="microsoft_azureml_rai_tabular_score_card", version=version
)

The current version of RAI built-in components is: 0.4.0


In [73]:
# RAI パイプラインを構築

categorical_columns = json.dumps(
    ["vendorID", "passengerCount"]
)
treatment_features = json.dumps(["lpepPickupDatetime", "tripDistance", "pickupLongitude", "pickupLatitude", "dropoffLongitude", "dropoffLatitude"])
desired_range = json.dumps([0, 290])
filter_columns = json.dumps([])


@dsl.pipeline(
    compute=compute_name,
    experiment_name=f"rai_pipeline_nyc_taxi",
)
def rai_pipeline_nyc_taxi(
    target_column_name,
    train_data,
    test_data
):
    # RAIInsights を開始
    create_rai_job = rai_constructor_component(
        title="RAI Dashboard Example",
        task_type="regression",
        model_info=expected_model_id,
        model_input=Input(type=AssetTypes.MLFLOW_MODEL, path=azureml_model_id),
        train_dataset=train_data,
        test_dataset=test_data,
        target_column_name=target_column_name,
        categorical_column_names=categorical_columns,
    )
    create_rai_job.set_limits(timeout=120)

    # モデルの説明を追加
    explain_job = rai_explanation_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        comment="My comment",
    )
    explain_job.set_limits(timeout=120)

    # 因果分析を追加
    causal_job = rai_causal_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        treatment_features=treatment_features,
    )
    causal_job.set_limits(timeout=180)

    # 反事実分析を追加
    counterfactual_job = rai_counterfactual_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        total_cfs=10,
        desired_range=desired_range,
    )
    counterfactual_job.set_limits(timeout=600)

    # エラー分析を追加
    erroranalysis_job = rai_erroranalysis_component(
        rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        filter_features=filter_columns,
    )
    erroranalysis_job.set_limits(timeout=120)

    # すべてを組み合わせ
    rai_gather_job = rai_gather_component(
        constructor=create_rai_job.outputs.rai_insights_dashboard,
        insight_1=explain_job.outputs.explanation,
        insight_2=causal_job.outputs.causal,
        insight_3=counterfactual_job.outputs.counterfactual,
        insight_4=erroranalysis_job.outputs.error_analysis,
    )
    rai_gather_job.set_limits(timeout=120)

    rai_gather_job.outputs.dashboard.mode = "upload"
    rai_gather_job.outputs.ux_json.mode = "upload"

    return {
        "dashboard": rai_gather_job.outputs.dashboard,
        "ux_json": rai_gather_job.outputs.ux_json
    }

In [74]:
insights_pipeline_job = rai_pipeline_nyc_taxi(
    target_column_name=target_column_name,
    train_data=programmers_train_mltable,
    test_data=programmers_test_mltable
)

In [75]:
created_job = ml_client.jobs.create_or_update(insights_pipeline_job)