
# Prerequisite

In [None]:

#import required libraries
from azure.ml import MLClient
from azure.ml.entities import Code, Dataset

In [None]:

#Enter details of your AML workspace
subscription_id = '<SUBSCRIPTION_ID>'
resource_group = '<RESOURCE_GROUP>'
workspace = '<AML_WORKSPACE_NAME>'

In [None]:
#get a handle to the workspace
ml_client = MLClient(subscription_id, resource_group, workspace)

# Pipeline job with registered component
## Register components

In [None]:
from azure.ml.entities import CommandComponent
parent_dir = './basic/1b_e2e_registered_components'
environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5"

train_component = dsl.load_component(parent_dir + '/train.yaml')
ml_client.create_or_update(train_component)

In [None]:
score_component = dsl.load_component(parent_dir + '/score.yaml')
ml_client.create_or_update(score_component)

In [None]:
eval_component = dsl.load_component(parent_dir + '/eval.yaml')
ml_client.create_or_update(eval_component)

## Build pipeline

In [None]:
from azure.ml import dsl, MLClient
from azure.ml.dsl import Pipeline
from azure.ml.entities import Component as ComponentEntity, Dataset
from pathlib import Path

def generate_dsl_pipeline(
        client: MLClient,
        train_component: ComponentEntity,
        score_component: ComponentEntity,
        eval_component: ComponentEntity,
    ) -> Pipeline:
    # 1. Load component funcs
    train_func = dsl.load_component(
        client=client,
        name=train_component.name,
        version=train_component.version,
    )
    score_func = dsl.load_component(
        client=client,
        name=score_component.name,
        version=score_component.version,
    )
    eval_func = dsl.load_component(
        client=client,
        name=eval_component.name,
        version=eval_component.version,
    )

    # 2. Construct pipeline
    @dsl.pipeline(
        compute="cpu-cluster",
        description="E2E dummy train-score-eval pipeline with registered components",
    )
    def sample_pipeline(
            pipeline_job_training_input,
            pipeline_job_test_input,
            pipeline_job_training_max_epocs,
            pipeline_job_training_learning_rate,
            pipeline_job_learning_rate_schedule,
    ):
        train_job = train_func(
            training_data=pipeline_job_training_input,
            max_epocs=pipeline_job_training_max_epocs,
            learning_rate=pipeline_job_training_learning_rate,
            learning_rate_schedule=pipeline_job_learning_rate_schedule,
        )
        score_job = score_func(model_input=train_job.outputs.model_output, test_data=pipeline_job_test_input)
        score_job.outputs.score_output.mode = "upload"
        evaluate_job = eval_func(scoring_result=score_job.outputs.score_output)
        return {
            "pipeline_job_trained_model": train_job.outputs.model_output,
            "pipeline_job_scored_data": score_job.outputs.score_output,
            "pipeline_job_evaluation_report": evaluate_job.outputs.eval_output,
        }

    pipeline = sample_pipeline(
        Dataset(local_path=parent_dir + "/data/"),
        Dataset(local_path=parent_dir + "/data/"),
        20,
        1.8,
        "time-based",
    )
    pipeline.outputs.pipeline_job_trained_model.mode = "upload"
    pipeline.outputs.pipeline_job_scored_data.mode = "upload"
    pipeline.outputs.pipeline_job_evaluation_report.mode = "upload"
    return pipeline

## Submit pipeline job

In [None]:
# create pipeline instance
pipeline = generate_dsl_pipeline(ml_client, train_component, score_component, eval_component)
# submit job to workspace
ml_client.jobs.create_or_update(pipeline, experiment_name="e2e_registered_components", continue_run_on_step_failure=True)