Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

# Develop your own Azure Machine Learning component using dsl component decorator

In this notebook, you learn how to create a simple machine learning component using dsl component decorator and use it in an ML pipeline.

* Create a component with dsl component decorator
* Run local test(optional), to make sure the code works correctly
* Consume the component in dsl.pipeline
* Register the component to your Machine learning workspace

## Prerequisites
* Install azure cli with azure-cli-ml extension following the [instructions here](setup-environment.ipynb).


## Initialize workspace

Initialize a workspace object from persisted configuration file.

In [None]:
from azureml.core import Workspace

config_path = "../../../../../.azureml/config.json"
try:
    ws = Workspace.from_config(path=config_path)
except Exception as ex:
    raise
    # # NOTE: Update following workspace information if not correctly configure before
    # subscription_id = "<your subscription ID>"
    # resource_group = "<your resource group>"
    # workspace_name = "<your workspace name>"

    # if subscription_id.startswith("<"):
    #     raise ex
    # else:  # write and reload from config file
    #     config = {
    #         "Scope": "/subscriptions/"
    #         + subscription_id
    #         + "/resourceGroups/"
    #         + resource_group
    #         + "/providers/Microsoft.MachineLearningServices/workspaces/"
    #         + workspace_name
    #         + "/projects/samples"
    #     }
    #     import json
    #     import os

    #     os.makedirs(os.path.dirname(config_path), exist_ok=True)
    #     with open(config_path, "w") as fo:
    #         fo.write(json.dumps(config))
    #     ws = Workspace.from_config(path=config_path)

## Prepare Datasets

In [None]:
# from azureml.core import Dataset, Datastore
# from azureml.data.datapath import DataPath

# datastore = Datastore.get(ws, 'workspaceblobstore')


# # get dataset
# dataset_mltable_data = "dataset_mltable_data"
# dataset_mltable_src = "dataset_mltable_src"


# if dataset_mltable_data not in ws.datasets:
#     print('Registering dataset_mltable_data...')
#     # train_data = Dataset.File.from_files(
#     #     path="../test_create_and_link/sdk/data/"
#     # )
    
#     data = Dataset.File.upload_directory(src_dir='../test_create_and_link/sdk/data/',
#            target=DataPath(datastore,  'hod/mltable_test_data/'),
#            show_progress=True)
#     data.register(
#         workspace=ws,
#         name=dataset_mltable_data,
#         description='mltable test data',
#     )
#     print('Registered')

# if dataset_mltable_src not in ws.datasets:
#     print('Registering dataset_mltable_src...')
#     # test_data = Dataset.File.from_files(
#     #     path="../test_create_and_link/convert2ss/"
#     # )
#     data = Dataset.File.upload_directory(src_dir="../test_create_and_link/convert2ss/",
#            target=DataPath(datastore,  'hod/mltable_test_src/'),
#            show_progress=True)
#     data.register(
#         workspace=ws,
#         name=dataset_mltable_src,
#         description='mltable test src',
#     )
#     print('Registered')

# mltable_data = Dataset.get_by_name(ws, name=dataset_mltable_data)
# mltable_src = Dataset.get_by_name(ws, name=dataset_mltable_src)

## Create components using dsl component decorator

We defined three sample component using dsl.command_component in [components/dsl_component/dsl_components.py](components/dsl_component/dsl_components.py).

In [None]:
with open("components/dsl_components.py") as fin:
    print(fin.read())

In [None]:
from components.dsl_components import (
    create_multi_path_dataset,
    consume_dataset,
)

# see the function help
# help(create_multi_path_dataset)

## Run the component in local (optional)

After we developed the component, to ensure it works correctly, we could load this component and run the component in local.

In [None]:
# component.run(experiment_name='local-run-with-train-component', mode='host', track_run_history=True, workspace=ws)

In [None]:
from azureml.core import Dataset, Datastore, Run

datastore = Datastore.get(ws, "workspaceblobstore")
relative_path_1 = "/hod/mltable_test_data/"
relative_path_2 = "/hod/mltable_test_src/"
relative_path = f'"{relative_path_1}", "{relative_path_2}"'

# Create dataset object
file_dataset = Dataset.File.from_files(path=[(datastore, relative_path_1), (datastore, relative_path_2)], validate=True)
file_dataset._ensure_saved_internal(ws)

## Consume the component in dsl pipeline

The example below demonstrates how to consume dsl component in dsl pipeline.

In [None]:
from azure.ml.component import dsl

cluster_name = "cpu-cluster"
# define a pipeline with dsl component
@dsl.pipeline(
    name='test_multi_path_dataset_pipeline',
    default_compute_target=cluster_name,
)
def test_multi_path_dataset_pipeline():
    # node_0 = create_multi_path_dataset()
    # node_1 = consume_dataset(data_path=node_0.outputs.data_output)
    node_1 = consume_dataset(data_path=file_dataset)

After defined the pipeline, we could run the component in local.

In [None]:
pipeline = test_multi_path_dataset_pipeline()

Or validate and submit it to workspace.

In [None]:
# validate pipeline
pipeline.validate(workspace=ws)

In [None]:
# submit pipeline
run = pipeline.submit(experiment_name='test_multi_path_dataset_pipeline', workspace=ws)
run

In [None]:
run.wait_for_completion()

## Create the component

The component can be created in workspace with the following scripts.

In [None]:
from azure.ml.component import Component

train_component_name = "dsl_train_component"
component_version = "0.0.1"
# Load or register the dsl component
try:
    registered_train_component_func = Component.load(
        ws, name=train_component_name, version=component_version
    )
except Exception:
    registered_train_component_func = Component.create(
        train_component_func,
        version=component_version,
        set_as_default=True,
        workspace=ws,
    )

In [None]:
# Check whether the component is successfully registered
component_func = Component.load(ws, name=train_component_name)
help(component_func)