In [None]:
#import required libraries
from azure.ml import MLClient, dsl
from azure.ml.entities import CommandJob, Code, Dataset, JobInput, JobOutput
from azure.identity import InteractiveBrowserCredential

In [None]:
#Enter details of your AML workspace
subscription_id = '<SUBSCRIPTION_ID>'
resource_group = '<RESOURCE_GROUP>'
workspace = '<AML_WORKSPACE_NAME>'

In [None]:
# get a handle to the workspace
ml_client = MLClient(credential=InteractiveBrowserCredential(), subscription_id=subscription_id, resource_group_name=resource_group, workspace_name=workspace)

In [None]:
#create the inputs and outputs required for the pipeline
prep_data = Dataset(
    local_path="./data"
)
pipeline_job_inputs = {'max_epocs_from_pipeline': '20', 'raw_data_from_pipeline': JobInput(dataset=prep_data)}

In [None]:
#define the prep-job to run in the pipeline
prep_cmd = "pip freeze && echo ${{inputs.max_epocs}} && python prep.py --raw_data ${{inputs.raw_data}} --prep_data ${{outputs.prep_data}}"
prep_job_inputs = {'max_epocs': 20, #take this input from the pipeline inputs
                    'raw_data': JobInput()} #upload the local data into a dataset
prep_job_outputs = {'prep_data': JobOutput()}

prep_job = CommandJob(
    code=Code(local_path="./src/prep"), #local path where the code is stored
    command = prep_cmd, #command to be run
    inputs=prep_job_inputs, #inputs to the job
    outputs=prep_job_outputs, #outputs of the job
    environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cuda11-gpu:9",
    #compute = "<override with some other compute if needed>"
)

In [None]:
#define the transform-job to run in the pipeline
transform_cmd = "python transform.py --clean_data ${{inputs.clean_data}} --transformed_data ${{outputs.transformed_data}}"
transform_job_inputs = {'clean_data': JobInput()}
transform_job_outputs = {'transformed_data': JobOutput()}

transform_job = CommandJob(
    code=Code(local_path="./src/transform"), #local path where the code is stored
    command = transform_cmd, #command to be run
    inputs=transform_job_inputs, #inputs to the job
    outputs=transform_job_outputs, #outputs of the job
    environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cuda11-gpu:9",
    #compute = "<override with some other compute if needed>"
)

In [None]:
#define the training-job to run in the pipeline
train_cmd = "python train.py --training_data ${{inputs.training_data}} --test_data ${{outputs.test_data}} --model_output ${{outputs.model_output}}"
train_job_inputs = {'training_data': JobInput()} #use the output data from the transform data
train_job_outputs = {'model_output': JobOutput(), 'test_data': JobOutput()}

train_job = CommandJob(
    code=Code(local_path="./src/train"),
    command = train_cmd,
    inputs = train_job_inputs,
    outputs=train_job_outputs,
    environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cuda11-gpu:9",
    #compute = "<override with some other compute if needed>"
)

In [None]:
#define the predict job to run in the pipeline
predict_cmd = "python predict.py --model_input ${{inputs.model_input}} --test_data ${{inputs.test_data}} --predictions ${{outputs.predictions}}"
predict_job_inputs = {
    'model_input': JobInput(), #use model output from the training job
    'test_data': JobInput()} #use the output test data from the training job
predict_job_outputs = {'predictions': JobOutput()}

predict_job = CommandJob(
    code=Code(local_path="./src/predict"),
    command = predict_cmd,
    inputs = predict_job_inputs,
    outputs=predict_job_outputs,
    environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cuda11-gpu:9",
    #compute = "<override with some other compute if needed>"
)

In [None]:
#define the score job to run in the pipeline
score_cmd = 'python score.py --predictions ${{inputs.predictions}} --model ${{inputs.model}} --score_report ${{outputs.score_report}}'
score_job_inputs = {
    'predictions': JobInput(), #use the predictions from predict job so we can score
    'model': JobInput()} #use the model from the training job
score_job_outputs = {'score_report': JobOutput()}

score_job = CommandJob(
    code=Code(local_path="./src/score"),
    command = score_cmd,
    inputs = score_job_inputs,
    outputs=score_job_outputs,
    environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cuda11-gpu:9",
    #compute = "<override with some other compute if needed>"
)

In [None]:
# convert jobs into components func
prep_func = dsl.load_component(component=prep_job)
transform_func = dsl.load_component(component=transform_job)
train_func = dsl.load_component(component=train_job)
predict_func = dsl.load_component(component=predict_job)
score_func = dsl.load_component(component=score_job)

In [None]:
# construct pipeline
@dsl.pipeline(compute="cpu-cluster", default_datastore="workspaceblobstore", description = 'nyc-taxi-pipeline-example')
def sample_pipeline(
        max_epocs_from_pipeline,
        raw_data_from_pipeline
    ):

    prep_job = prep_func(
        max_epocs=max_epocs_from_pipeline,
        raw_data=raw_data_from_pipeline
        )
    transform_job = transform_func(clean_data=prep_job.outputs.prep_data)
    train_job = train_func(training_data=transform_job.outputs.transformed_data)
    predict_job = predict_func(model_input=train_job.outputs.model_output, test_data=train_job.outputs.test_data)
    score_job = score_func(predictions=predict_job.outputs.predictions, model=train_job.outputs.model_output)
    return {
        "pipeline_job_predictions": predict_job.outputs.predictions,
        "pipeline_job_score_report": score_job.outputs.score_report
        }

In [None]:
# create pipeline instance
pipeline = sample_pipeline(
    pipeline_job_inputs['max_epocs_from_pipeline'],
    pipeline_job_inputs['raw_data_from_pipeline'])
pipeline.outputs.pipeline_job_predictions.data = "/predictions"
pipeline.outputs.pipeline_job_predictions.mode = "rw_mount"
pipeline.outputs.pipeline_job_score_report.data = "/report"
pipeline.outputs.pipeline_job_score_report.mode = "rw_mount"  

In [None]:
#submit the pipeline job
returned_job = ml_client.jobs.create_or_update(pipeline)
#get a URL for the status of the job
returned_job.services["Studio"].endpoint