
# Prerequisite

In [None]:

#import required libraries
from azure.ml import MLClient, dsl
from azure.ml.entities import CommandJob, Code, JobInput, JobOutput, Dataset
from azure.identity import InteractiveBrowserCredential

In [None]:
#Enter details of your AML workspace
subscription_id = '15ae9cb6-95c1-483d-a0e3-b1a1a3b06324'
resource_group = 'user_studies'
workspace = 'CLIV1Pipelines'

In [None]:
#get a handle to the workspace
ml_client = MLClient(InteractiveBrowserCredential(), subscription_id, resource_group, workspace)

# Pipeline job with command jobs
## Create command jobs

In [None]:
# define command jobs
environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:5"
parent_dir = '.'

# define train job
train_inputs = {'training_data': JobInput(dataset=Dataset(local_path=parent_dir + "/data/")), 'max_epocs': 20, 'learning_rate': 1.8, 'learning_rate_schedule': 'time-based'}
train_outputs = {'model_output': JobOutput()}
train_cmd = """python train.py --training_data ${{inputs.training_data}} --max_epocs ${{inputs.max_epocs}}
        --learning_rate ${{inputs.learning_rate}} --learning_rate_schedule 
        ${{inputs.learning_rate_schedule}} --model_output ${{outputs.model_output}}"""

train_job = CommandJob(
    inputs=train_inputs,
    outputs=train_outputs,
    display_name="my-train-job",
    code=Code(local_path=parent_dir + "/train_src"),
    environment=environment,
    compute="cpu-cluster",
    command=train_cmd
    )

# define score job
score_inputs = {'model_input': JobInput(dataset=Dataset(local_path=parent_dir + "/data/")), 'test_data':JobInput(dataset=Dataset(local_path=parent_dir + "/data/"))}
score_outputs = {'score_output': JobOutput()}
score_cmd = """python score.py --model_input ${{inputs.model_input}}
        --test_data ${{inputs.test_data}} --score_output ${{outputs.score_output}}"""

score_job = CommandJob(
    inputs=score_inputs,
    outputs=score_outputs,
    display_name="my-score-job",
    code=Code(local_path=parent_dir + "/score_src"),
    environment=environment,
    command=score_cmd
        )

# define eval job
eval_inputs = {'model_path': JobInput(dataset=Dataset(local_path=parent_dir + "/data/")), 'scoring_result': JobInput(dataset=Dataset(local_path=parent_dir + "/data/"))}
eval_outputs = {'eval_output': JobOutput()}
eval_cmd = """python eval.py 
    --model_path $${{inputs.model_path}} 
    --scoring_result ${{inputs.scoring_result}} 
    --eval_output ${{outputs.eval_output}}"""

eval_job = CommandJob(
    inputs=eval_inputs,
    outputs=eval_outputs,
    display_name="my-evaluate-job",
    code=Code(local_path=parent_dir + "/eval_src"),
    environment=environment,
    command=eval_cmd
)

In [None]:
# convert jobs into components
train_func = dsl.load_component(component=train_job)
score_func = dsl.load_component(component=score_job)
eval_func = dsl.load_component(component=eval_job)

## Build pipeline

In [None]:
# construct pipeline with command job
@dsl.pipeline(
    compute="cpu-cluster",
)
def sample_pipeline(
        pipeline_job_training_input,
        pipeline_job_test_input,
        pipeline_job_training_max_epocs,
        pipeline_job_training_learning_rate,
        pipeline_job_learning_rate_schedule,
):
    
    train_node = train_func(
        training_data=pipeline_job_training_input,
        max_epocs=pipeline_job_training_max_epocs,
        learning_rate=pipeline_job_training_learning_rate,
        learning_rate_schedule=pipeline_job_learning_rate_schedule,
    )

    score_node = score_func(
        model_input=train_node.outputs.model_output,
        test_data=pipeline_job_test_input,
    )

    eval_node = eval_func(model_path=train_node.outputs.model_output,scoring_result=score_node.outputs.score_output)
    return {
        "pipeline_job_trained_model": train_node.outputs.model_output,
        "pipeline_job_scored_data": score_node.outputs.score_output,
        "pipeline_job_evaluation_report": eval_node.outputs.eval_output,
    }

# create pipeline instance
pipeline = sample_pipeline(
    Dataset(local_path=parent_dir + "/data/"),
    Dataset(local_path=parent_dir + "/data/"),
    20,
    1.8,
    "time-based",
)
pipeline.outputs.pipeline_job_trained_model.mode = "upload"
pipeline.outputs.pipeline_job_scored_data.mode = "upload"
pipeline.outputs.pipeline_job_evaluation_report.mode = "upload"

In [None]:
# submit job to workspace
ml_client.jobs.create_or_update(pipeline, experiment_name="command_job_in_pipeline", continue_run_on_step_failure=True)