# Defining Model Id

In [23]:
# Replace with your Model Name
expected_model_id = "rai_programmer_example_reg_1666334612:1"
azureml_model_id = f'azureml:{expected_model_id}'

# Defining the Versions

In [24]:
version_string = '1'

In [25]:
rai_programmer_example_version_string = '5'

# Defining the Compute

In [26]:
# Replace with your Compute Name
compute_name = "aiml-cpu"

# Importing Necessary Packages

In [5]:
import os
import pandas as pd

# Defining File Paths

In [27]:
train_data_path = 'data-programmer-regression/train/'

# Defining Input Train and Test Files

In [28]:
test_data_path = 'data-programmer-regression/test/'

# Defining Target Columns

In [29]:
target_column_name = "score"

# Configuring Workspace connections

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
ml_client = MLClient.from_config(credential=DefaultAzureCredential(exclude_shared_token_cache_credential=True),
                     logging_enable=True)

# Defining Input train and test files

In [31]:
input_train_data = "Programmers_Train_MLTable"
input_test_data = "Programmers_Test_MLTable"


# Loading RAI Components

In [32]:
rai_constructor_component = ml_client.components.get(
    name="rai_insights_constructor", version=version_string
)

rai_explanation_component = ml_client.components.get(
    name="rai_insights_explanation", version=version_string
)

rai_causal_component = ml_client.components.get(
    name="rai_insights_causal", version=version_string
)

rai_counterfactual_component = ml_client.components.get(
    name="rai_insights_counterfactual", version=version_string
)

rai_erroranalysis_component = ml_client.components.get(
    name="rai_insights_erroranalysis", version=version_string
)

rai_gather_component = ml_client.components.get(
    name="rai_insights_gather", version=version_string
)

rai_scorecard_component = ml_client.components.get(
    name="rai_score_card", version=version_string
)

# Score card generation config

In [33]:
import json

score_card_config_dict = {
  "Model": {
    "ModelName": "GPT2 Access",
    "ModelType": "Regression",
    "ModelSummary": "This is a regression model to analyzer how likely a programmer is given access to gpt 2"
  },
  "Metrics": {
    "mean_absolute_error": {
      "threshold": "<=20"
    },
    "mean_squared_error": {}
  },
  "FeatureImportance": {
    "top_n": 6
  },
  "DataExplorer": {
    "features": [
      "YOE",
      "age"
    ]
  },
  "Fairness": {
    "metric": ["mean_squared_error", "mean_absolute_error"],
    "sensitive_features": ["IDE", "style"],
    "fairness_evaluation_kind": "difference"
  }
}

score_card_config_filename = "rai_programmer_regression_score_card_config.json"

with open(score_card_config_filename, 'w') as f:
    json.dump(score_card_config_dict, f)

# Coverting to JSON Components

In [34]:
import json
from azure.ai.ml import dsl,Input
from azure.ai.ml.constants import AssetTypes

score_card_config_path = Input(
    type="uri_file",
    path=score_card_config_filename,
    mode="download"
)

categorical_columns = json.dumps(["location", "style", "job title", "OS", "Employer", "IDE", "Programming language"])
treatment_features = json.dumps(["Number of github repos contributed to", "YOE"])
desired_range = json.dumps([5, 10])
filter_columns = json.dumps(["style", "Employer"])

@dsl.pipeline(
        compute=compute_name,
        description="Example RAI computation on programmers data",
        experiment_name=f"RAI_Programmers_Example_RAIInsights_Computation",
    )
def rai_programmer_regression_pipeline(
        target_column_name,
        train_data,
        test_data,
        score_card_config_path,
    ):
        # Initiate the RAIInsights
        create_rai_job = rai_constructor_component(
            title="RAI Dashboard Example",
            task_type="regression",
            model_info=expected_model_id,
            model_input=Input(type=AssetTypes.MLFLOW_MODEL, path=azureml_model_id),
            train_dataset=train_data,
            test_dataset=test_data,
            target_column_name=target_column_name,
            categorical_column_names=categorical_columns
        )
        create_rai_job.set_limits(timeout=120)
        
        # Add an explanation
        explain_job = rai_explanation_component(
            comment="Explanation for the programmers dataset",
            rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
        )
        explain_job.set_limits(timeout=120)
        
        # Add causal analysis
        causal_job = rai_causal_component(
            rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
            treatment_features=treatment_features,
        )
        causal_job.set_limits(timeout=180)
        
        # Add counterfactual analysis
        counterfactual_job = rai_counterfactual_component(
            rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
            total_cfs=10,
            desired_range=desired_range
        )
        counterfactual_job.set_limits(timeout=600)
        
        # Add error analysis
        erroranalysis_job = rai_erroranalysis_component(
            rai_insights_dashboard=create_rai_job.outputs.rai_insights_dashboard,
            filter_features=filter_columns
        )
        erroranalysis_job.set_limits(timeout=120)
        
        # Combine everything
        rai_gather_job = rai_gather_component(
            constructor=create_rai_job.outputs.rai_insights_dashboard,
            insight_1=explain_job.outputs.explanation,
            insight_2=causal_job.outputs.causal,
            insight_3=counterfactual_job.outputs.counterfactual,
            insight_4=erroranalysis_job.outputs.error_analysis,
        )
        rai_gather_job.set_limits(timeout=120)

        rai_gather_job.outputs.dashboard.mode = "upload"
        rai_gather_job.outputs.ux_json.mode = "upload"

        # Generate score card in pdf format for a summary report on model performance,
        # and observe distrbution of error between prediction vs ground truth.
        rai_scorecard_job = rai_scorecard_component(
            dashboard=rai_gather_job.outputs.dashboard,
            pdf_generation_config=score_card_config_path
        )

        return {
            "dashboard": rai_gather_job.outputs.dashboard,
            "ux_json": rai_gather_job.outputs.ux_json,
            "scorecard": rai_scorecard_job.outputs.scorecard
        }

# Defining Train and Test Files

In [35]:
programmers_train_mltable = Input(
    type="mltable", path=f"{input_train_data}:{rai_programmer_example_version_string}", mode="download"
)
programmers_test_mltable = Input(
    type="mltable", path=f"{input_test_data}:{rai_programmer_example_version_string}", mode="download"
)

# Defining Pipeline Objects

In [36]:
import uuid
from azure.ai.ml import Output

insights_pipeline_job = rai_programmer_regression_pipeline(
    target_column_name=target_column_name,
    train_data=programmers_train_mltable,
    test_data=programmers_test_mltable,
    score_card_config_path=score_card_config_path,
)

rand_path = str(uuid.uuid4())
insights_pipeline_job.outputs.dashboard = Output(
    path=f"azureml://datastores/workspaceblobstore/paths/{rand_path}/dashboard/",
    mode="upload",
    type="uri_folder",
)
insights_pipeline_job.outputs.ux_json = Output(
    path=f"azureml://datastores/workspaceblobstore/paths/{rand_path}/ux_json/",
    mode="upload",
    type="uri_folder",
)
insights_pipeline_job.outputs.scorecard = Output(
    path=f"azureml://datastores/workspaceblobstore/paths/{rand_path}/scorecard/",
    mode="upload",
    type="uri_folder",
)

# Submiting the pipeline to AzureML for execution

In [None]:
from azure.ai.ml.entities import PipelineJob
from azure.ai.ml import Input

def submit_and_wait(ml_client, pipeline_job) -> PipelineJob:
    created_job = ml_client.jobs.create_or_update(pipeline_job)
    assert created_job is not None

    while created_job.status not in ['Completed', 'Failed', 'Canceled', 'NotResponding']:
        time.sleep(30)
        created_job = ml_client.jobs.get(created_job.name)
        print("Latest status : {0}".format(created_job.status))
    assert created_job.status == 'Completed'
    return created_job
    
insights_job = submit_and_wait(ml_client, insights_pipeline_job)

# Expected URI

In [None]:
sub_id = ml_client._operation_scope.subscription_id
rg_name = ml_client._operation_scope.resource_group_name
ws_name = ml_client.workspace_name

expected_uri = f"https://ml.azure.com/model/{expected_model_id}/model_analysis?wsid=/subscriptions/{sub_id}/resourcegroups/{rg_name}/workspaces/{ws_name}"

print(f"Please visit {expected_uri} to see your analysis")

# Downloading the Scorecard PDF

In [None]:
target_directory = "."

ml_client.jobs.download(
    insights_job.name, download_path=target_directory, output_name="scorecard"
)