In [None]:
#import required libraries
from azure.ml import MLClient
from azure.ml.entities import CommandJob, Code, PipelineJob, Dataset, JobInput
from azure.identity import InteractiveBrowserCredential

In [None]:
#Enter details of your AML workspace
subscription_id = '<SUBSCRIPTION_ID>'
resource_group = '<RESOURCE_GROUP>'
workspace = '<AML_WORKSPACE_NAME>'

In [None]:
#get a handle to the workspace
ml_client = MLClient(InteractiveBrowserCredential(), subscription_id, resource_group, workspace)

In [None]:
#create the inputs and outputs required for the pipeline
prep_data = Dataset(
    local_path="./data"
)
pipeline_job_inputs = {'max_epocs_from_pipeline': '20', 'raw_data_from_pipeline': JobInput(dataset=prep_data)}

In [None]:
#define the prep-job to run in the pipeline
prep_cmd = "pip freeze && echo ${{inputs.max_epocs}} && python prep.py --raw_data ${{inputs.raw_data}} --prep_data ${{outputs.prep_data}}"
prep_job_inputs = {'max_epocs': '${{inputs.max_epocs_from_pipeline}}', #take this input from the pipeline inputs
                    'raw_data': '${{inputs.raw_data_from_pipeline}}'} #upload the local data into a dataset
prep_job_outputs = {'prep_data': None}

prep_job = CommandJob(
    code=Code(local_path="./src/prep"), #local path where the code is stored
    command = prep_cmd, #command to be run
    inputs=prep_job_inputs, #inputs to the job
    outputs=prep_job_outputs, #outputs of the job
    environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cuda11-gpu:9",
    #compute = "<override with some other compute if needed>"
)

In [None]:
#define the transform-job to run in the pipeline
transform_cmd = "python transform.py --clean_data ${{inputs.clean_data}} --transformed_data ${{outputs.transformed_data}}"
transform_job_inputs = {'clean_data': '${{jobs.prep-job.outputs.prep_data}}'}
transform_job_outputs = {'transformed_data': None}

transform_job = CommandJob(
    code=Code(local_path="./src/transform"), #local path where the code is stored
    command = transform_cmd, #command to be run
    inputs=transform_job_inputs, #inputs to the job
    outputs=transform_job_outputs, #outputs of the job
    environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cuda11-gpu:9",
    #compute = "<override with some other compute if needed>"
)

In [None]:
#define the training-job to run in the pipeline
train_cmd = "python train.py --training_data ${{inputs.training_data}} --test_data ${{outputs.test_data}} --model_output ${{outputs.model_output}}"
train_job_inputs = {'training_data': '${{jobs.transform-job.outputs.transformed_data}}'} #use the output data from the transform data
train_job_outputs = {'model_output': None, 'test_data': None}

train_job = CommandJob(
    code=Code(local_path="./src/train"),
    command = train_cmd,
    inputs = train_job_inputs,
    outputs=train_job_outputs,
    environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cuda11-gpu:9",
    #compute = "<override with some other compute if needed>"
)

In [None]:
#define the predict job to run in the pipeline
predict_cmd = "python predict.py --model_input ${{inputs.model_input}} --test_data ${{inputs.test_data}} --predictions ${{outputs.predictions}}"
predict_job_inputs = {
    'model_input': '${{jobs.train-job.outputs.model_output}}', #use model output from the training job
    'test_data': '${{jobs.train-job.outputs.test_data}}'} #use the output test data from the training job
predict_job_outputs = {'predictions': None}

predict_job = CommandJob(
    code=Code(local_path="./src/predict"),
    command = predict_cmd,
    inputs = predict_job_inputs,
    outputs=predict_job_outputs,
    environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cuda11-gpu:9",
    #compute = "<override with some other compute if needed>"
)

In [None]:
#define the score job to run in the pipeline
score_cmd = 'python score.py --predictions ${{inputs.predictions}} --model ${{inputs.model}} --score_report ${{outputs.score_report}}'
score_job_inputs = {
    'predictions': '${{jobs.predict-job.outputs.predictions}}', #use the predictions from predict job so we can score
    'model': '${{jobs.train-job.outputs.model_output}}'} #use the model from the training job
score_job_outputs = {'score_report': None}

score_job = CommandJob(
    code=Code(local_path="./src/score"),
    command = score_cmd,
    inputs = score_job_inputs,
    outputs=score_job_outputs,
    environment = "AzureML-sklearn-0.24-ubuntu18.04-py37-cuda11-gpu:9",
    #compute = "<override with some other compute if needed>"
)

In [None]:
# lets create the pipeline
pipeline_job = PipelineJob(
    description = 'nyc-taxi-pipeline-example',
    jobs= {
        'prep-job':prep_job, 
        'transform-job': transform_job, 
        'train-job': train_job,
        'predict-job': predict_job,
        'score-job': score_job}, #add all the jobs into this pipeline
    inputs= pipeline_job_inputs, #top level inputs to the pipeline
    outputs=prep_job_outputs,
    compute = "gpu-cluster"
)

In [None]:
#submit the pipeline job
returned_job = ml_client.create_or_update(pipeline_job)
#get a URL for the status of the job
returned_job.services["Studio"].endpoint