Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/CMK8s-Samples/sample_notebooks/008%20SDK%202.0/sample_pipeline_SDK2.0.png)

In [None]:
# install AML SDK
pip install --upgrade azureml-sdk

In [None]:
# install CMAKS SDK
pip install --disable-pip-version-check --extra-index-url https://azuremlsdktestpypi.azureedge.net/CmAks-Compute-Test/D58E86006C65 azureml-pipeline-steps azureml-contrib-pipeline-steps azureml_contrib_itp --upgrade

In [None]:
# install SDK 2.0
pip install azureml-pipeline-wrapper[steps,notebooks]==0.1.0.15254362 --extra-index-url https://azuremlsdktestpypi.azureedge.net/CLI-SDK-Runners-Validation/15254362 --user --upgrade

In [None]:
# define workspace and CMAKS compute target
workspace_name = 'cmaksphillywestcentralus'
sub_id = '06237282-1ddd-4677-9878-5bc19cc0001a'
resource_group = 'ITPTestingWorkers'
compute_name = 'cmakstestgpu2'

In [None]:
from azureml.core import Workspace, Run, Dataset
from azureml.pipeline.wrapper import Pipeline, dsl, Module

# innit worksapce
ws = Workspace.get(name=workspace_name, subscription_id=sub_id, resource_group=resource_group)

In [None]:
# create dummy dataset
training_data_name = 'training_data'
test_data_name = 'test_data'

if training_data_name not in ws.datasets:
    print('Registering a training dataset for sample pipeline ...')
    # get the datastore to upload prepared data
    datastore = workspace.get_default_datastore()
    # upload the local file from src_dir to the target_path in datastore
    datastore.upload(src_dir='dummy_data', target_path='dummy_data')
    train_data = Dataset.File.from_files(path=(datastore,'dummy_data/hello.json'))
    train_data.register(workspace = ws, 
                              name = training_data_name, 
                              description = 'Training data (just for illustrative purpose)')
    print('Registerd')
else:
    train_data = ws.datasets[training_data_name]
    print('Training dataset found in workspace')

if test_data_name not in ws.datasets:
    print('Registering a test dataset for sample pipeline ...')
    test_data = Dataset.File.from_files(path=(datastore,'dummy_data/hello.json'))
    test_data.register(workspace = ws, 
                          name = test_data_name, 
                          description = 'Test data (just for illustrative purpose)')
    print('Registered')
else:
    test_data = ws.datasets[test_data_name]    
    print('Test dataset found in workspace')

In [None]:
train_module_func = Module.from_yaml(ws, './train-score-eval/train.yaml')
score_module_func = Module.from_yaml(ws, './train-score-eval/score.yaml')
eval_module_func = Module.from_yaml(ws, './train-score-eval/eval.yaml')
compare_module_func = Module.from_yaml(ws, './train-score-eval/compare2.yaml')

train_data = Dataset.get_by_name(ws, training_data_name)
test_data = Dataset.get_by_name(ws, test_data_name)

In [None]:
# define a sub pipeline
@dsl.pipeline(name = 'Train-Score-Eval', 
              description = 'train model and evaluate model perf')
def training_pipeline(input_data, learning_rate):
    train = train_module_func(
        training_data=input_data, 
        max_epochs=5, 
        learning_rate=learning_rate)
   
    score = score_module_func(
        model_input=train.outputs.model_output, 
        test_data=test_data)

    eval = eval_module_func(scoring_result=score.outputs.score_output)

    return {'eval_output': eval.outputs.eval_output, 'model_output': train.outputs.model_output}

In [None]:
# import cmakscompute, if don't, can't find cmaks compute
from azureml.contrib.core.compute.cmakscompute import CmAksCompute
from azureml.core.compute import ComputeTarget
cmaks_compute = ComputeTarget(workspace=ws, name=compute_name)

In [None]:
# define pipeline with sub pipeline
@dsl.pipeline(name = 'Dummy automl pipeline - aks', 
              description = 'select best model trained with different learning rate',
              default_compute_target = cmaks_compute)
def dummy_automl_pipeline():
    train_and_evalute_model1 = training_pipeline(train_data, 0.01)
    train_and_evalute_model2 = training_pipeline(train_data, 0.02)
    
    compare = compare_module_func(
        model1=train_and_evalute_model1.outputs.model_output, 
        eval_result1=train_and_evalute_model1.outputs.eval_output,
        model2=train_and_evalute_model2.outputs.model_output,
        eval_result2=train_and_evalute_model2.outputs.eval_output
    )

    return {**compare.outputs}

# create a pipeline
pipeline = dummy_automl_pipeline()

In [None]:
#pipeline.validate()

In [None]:
pipeline.submit(experiment_name='samples')