In [None]:
# Had to run from the shell:
# ipython kernel install --name "local-conda-env" --user
#import required libraries
from azure.ml import MLClient
from azure.ml.entities import ComponentJob, Code, PipelineJob, Dataset, InputDatasetEntry, CommandJob

This notebook is for use with a workspace which has had the components and datasets uploaded by the build automation. This uses `epoch_seconds` as a version, so get these somewhere reusable:

In [None]:
workspace_version_suffix="1636365777"
component_dataset_version_suffix="1636389121"

In [None]:
#Enter details of your AML workspace
subscription_id = '589c7ae9-223e-45e3-a191-98433e0821a9'
resource_group = f'amlisdkv2-rg-{workspace_version_suffix}'
workspace = f'amlisdkv2{workspace_version_suffix}'

In [None]:
#get a handle to the workspace
ml_client = MLClient(subscription_id, resource_group, workspace)

The next few cells aren't necessary, but were done before I learned how to connect to a Dataset

In [None]:
train_data = Dataset(
    paths=['azureml:workspaceblobstore:LocalUpload/7686dd6cb00e860d4ab820252bb8d456/adult_train.parquet']
    # name="Boston_Train_PQ",
    # version="1635933774"
)

In [None]:
data_sas = 'https://amlisdkvstoragea8b89ec4f.blob.core.windows.net/azureml-blobstore-860b3784-4c44-40fd-8e63-37c83ebd06f8/LocalUpload/7686dd6cb00e860d4ab820252bb8d456/adult_train.parquet?sp=r&st=2021-11-04T19:02:27Z&se=2021-11-05T03:02:27Z&spr=https&sv=2020-08-04&sr=b&sig=i004fOT9iYdZ1Gm%2FuwF5b7I8Z7S9tcL4y51Ysl5pAZQ%3D'

get_data_cmd = 'bash fetch.bash "'+data_sas+'" adult_census.parquet ${{outputs.adultcensus}}'
print(get_data_cmd)

get_data_job = CommandJob(
    command=get_data_cmd,
    outputs={'adultcensus': None},
    environment = 'AzureML-Minimal:18',
    compute = 'cpucluster',
    code=Code(local_path='fetch_script')
)

These are the global pipeline inputs

In [None]:
pipeline_inputs = { 
    'target_column_name':'income',
    'my_training_data': InputDatasetEntry(dataset=f"Adult_Train_PQ:{component_dataset_version_suffix}"),
    'my_test_data': InputDatasetEntry(dataset=f"Adult_Test_PQ:{component_dataset_version_suffix}")
}

Create the training job, which creates a logistic regressor for the Adult Census dataset:

In [None]:
train_job_inputs = {
    'target_column_name': '${{inputs.target_column_name}}',
    'training_data': '${{inputs.my_training_data}}',
}
train_job_outputs = {
    'model_output': None
}

train_job = ComponentJob(
    component=f"TrainLogisticRegressionForRAI:{component_dataset_version_suffix}",
    inputs = train_job_inputs,
    outputs=train_job_outputs
)

And a job to register the model and put out the JSON file which Model Analysis can read:

In [None]:
register_job_inputs = {
    'model_input_path': '${{jobs.train-model-job.outputs.model_output}}',
    'model_base_name': 'notebook_registered_logreg',
}
register_job_outputs = {
    'model_info_output_path': None
}
register_job = ComponentJob(
    component=f"RegisterModel:{component_dataset_version_suffix}",
    inputs = register_job_inputs,
    outputs=register_job_outputs
)

This is the top level Model Analysis job, which declares that we want to create an analysis. This will also take the data snapshots

In [None]:
create_ma_inputs = {
    'title': 'Experimenting from a Notebook',
    'task_type': 'classification',
    'model_info_path': '${{jobs.register-model-job.outputs.model_info_output_path}}',
    'train_dataset': '${{inputs.my_training_data}}',
    'test_dataset': '${{inputs.my_test_data}}',
    'target_column_name': '${{inputs.target_column_name}}',
    'X_column_names': '["Age", "Workclass", "Education-Num", "Marital Status", "Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per week", "Country"]',
    'datastore_name': 'workspaceblobstore',
    'categorical_column_names': '["Race", "Sex", "Workclass", "Marital Status", "Country", "Occupation"]',
}
create_ma_outputs = {
    'model_analysis_info': None
}
create_ma_job = ComponentJob(
    component=f"AzureMLModelAnalysis:{component_dataset_version_suffix}",
    inputs = create_ma_inputs,
    outputs=create_ma_outputs
)

Create an explanation for the Model Analysis:

In [None]:
explain_inputs = {
    'comment': 'Insert text here',
    'model_analysis_info': '${{jobs.create-ma-job.outputs.model_analysis_info}}'
}
explain_job = ComponentJob(
    component=f"AzureMLModelAnalysisExplanation:{component_dataset_version_suffix}",
    inputs = explain_inputs
)

Put all the jobs into a pipeline (note that several of them have already be referencing the keys in the `jobs` section).

In [None]:
pipeline_job = PipelineJob(
    description="Notebook submitted Adult",
    jobs = {
        'train-model-job': train_job,
        'register-model-job': register_job,
        'create-ma-job': create_ma_job,
        'explain-ma-job': explain_job,
    },
    inputs=pipeline_inputs,
    outputs=train_job_outputs,
    commpute="cpucluster"
)

And the actual submission:

In [None]:
#submit the pipeline job
returned_job = ml_client.jobs.create_or_update(pipeline_job)
#get a URL for the status of the job
returned_job.services["Studio"].endpoint