
# Prerequisite

In [None]:

#import required libraries
from azure.ml import MLClient
from azure.ml.entities import Code, Dataset
from azure.identity import InteractiveBrowserCredential

In [None]:
#Enter details of your AML workspace
subscription_id = '<SUBSCRIPTION_ID>'
resource_group = '<RESOURCE_GROUP>'
workspace = '<AML_WORKSPACE_NAME>'

In [None]:
# get a handle to the workspace
ml_client = MLClient(InteractiveBrowserCredential(), subscription_id, resource_group, workspace)

# build pipeline

In [None]:
from azure.ml import dsl
from azure.ml.dsl import Pipeline
from azure.ml.entities import Dataset, Component
from pathlib import Path

parent_dir = ''

# 1. Get component definition and transfer component to functions


prep_component = Component.load(path=parent_dir + "./prep.yml")
prep_func = dsl.load_component(component=prep_component)

transform_component = Component.load(path=parent_dir + "./transform.yml")
transform_func = dsl.load_component(component=transform_component)

train_component = Component.load(path=parent_dir + "./train.yml")
train_func = dsl.load_component(component=train_component)

predict_component = Component.load(path=parent_dir + "./predict.yml")
predict_func = dsl.load_component(component=predict_component)

score_component = Component.load(path=parent_dir + "./score.yml")
score_func = dsl.load_component(component=score_component)

# 2. Construct pipeline
@dsl.pipeline(compute="cpu-cluster", default_datastore="workspaceblobstore")
def sample_pipeline(pipeline_job_input):
    prep_job = prep_func(raw_data=pipeline_job_input)
    transform_job = transform_func(clean_data=prep_job.outputs.prep_data)
    train_job = train_func(training_data=transform_job.outputs.transformed_data)
    predict_job = predict_func(
        model_input=train_job.outputs.model_output, test_data=train_job.outputs.test_data
    )
    score_job = score_func(predictions=predict_job.outputs.predictions, model=train_job.outputs.model_output)
    return {
        "pipeline_job_prepped_data": prep_job.outputs.prep_data,
        "pipeline_job_transformed_data": transform_job.outputs.transformed_data,
        "pipeline_job_trained_model": train_job.outputs.model_output,
        "pipeline_job_test_data": train_job.outputs.test_data,
        "pipeline_job_predictions": predict_job.outputs.predictions,
        "pipeline_job_score_report": score_job.outputs.score_report,
    }

# create pipeline instance
pipeline = sample_pipeline(
    Dataset(local_path=parent_dir + "./data/")
)
pipeline.outputs.pipeline_job_prepped_data.data = "/prepped_data"
pipeline.outputs.pipeline_job_prepped_data.mode = "rw_mount"
pipeline.outputs.pipeline_job_transformed_data.data = "/transformed_data"
pipeline.outputs.pipeline_job_transformed_data.mode = "rw_mount"
pipeline.outputs.pipeline_job_trained_model.data = "/trained-model"
pipeline.outputs.pipeline_job_trained_model.mode = "rw_mount"
pipeline.outputs.pipeline_job_test_data.data = "/test_data"
pipeline.outputs.pipeline_job_test_data.mode = "rw_mount"
pipeline.outputs.pipeline_job_predictions.data = "/predictions"
pipeline.outputs.pipeline_job_predictions.mode = "rw_mount"
pipeline.outputs.pipeline_job_score_report.data = "/report"
pipeline.outputs.pipeline_job_score_report.mode = "rw_mount"  

## Submit pipeline job

In [None]:
# submit job to workspace
returned_job=ml_client.jobs.create_or_update(pipeline, experiment_name="nyc_taxi_data_regression", continue_run_on_step_failure=True)
returned_job.services["Studio"].endpoint