In [3]:
# handle to the workspace
from azure.ml import MLClient

# Authentication package
from azure.identity import InteractiveBrowserCredential

In [4]:
# get a handle to the workspace
ml_client = MLClient(
    InteractiveBrowserCredential(), 
    subscription_id = '48bbc269-ce89-4f6f-9a12-c6f91fcb772d',
    resource_group_name = 'aml1p-rg',
    workspace_name = 'aml1p-ml-eus2'
    #subscription_id = '<SUBSCRIPTION_ID>', 
    #resource_group = '<RESOURCE_GROUP>', 
    #workspace = '<AML_WORKSPACE_NAME>'
)

In [5]:
from azure.ml.entities import Dataset

coco_trainval_path = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"

coco_trainval_dataset = Dataset(
    name="coco_trainval2017_zip",
    paths=[dict(file=coco_trainval_path)],
    description="annotations_trainval2017.zip",
    tags={'source_type':'web',
          'source':'cocodataset.org'
    },
)

In [45]:
from azure.ml.entities import CommandComponent, JobInput, JobOutput

unzip_component = CommandComponent(
    name="Unzip",
    
    # this component has no code, just a simple unzip command
    command = "ls -lr ${{inputs.archive_path}}; unzip -t ${{inputs.archive_path}} -d ${{outputs.extracted_data}}",

    # inputs and outputs need to match with the command
    inputs = {
        'archive_path': { 'type': 'path' }
    },
    outputs = {
        'extracted_data': { 'type': 'path' }
    },
    
    # we're using a curated environment
    environment = 'AzureML-sklearn-0.24-ubuntu18.04-py37-cpu:9',
)

In [46]:
from azure.ml import dsl

# we'll package this unzip command as a component to use within a pipeline
unzip_component_func = dsl.load_component(component=unzip_component)

# the dsl decorator tells the sdk that we are defining an AML pipeline
@dsl.pipeline(
    compute="cpu-d14-v2", #"cpu-cluster",
    description="e2e images preparation",
)
def coco_preparation_pipeline():
    annotations_unzip_step = unzip_component_func(
        archive_path=JobInput(file="http://images.cocodataset.org/annotations/annotations_trainval2017.zip")
    )
    train_unzip_step = unzip_component_func(
        archive_path=JobInput(file="http://images.cocodataset.org/zips/train2017.zip")
    )
    valid_unzip_step = unzip_component_func(
        archive_path=JobInput(file="http://images.cocodataset.org/zips/val2017.zip")
    )

    return {
        "annotations_data": annotations_unzip_step.outputs.extracted_data
    }

pipeline_instance = coco_preparation_pipeline()

In [47]:
# submit the pipeline job
returned_job = ml_client.jobs.create_or_update(
    pipeline_instance,
    
    # Project's name
    experiment_name="e2e_image_preparation",
    
    # If there is no dependency, pipeline run will continue even after the failure of one component
    continue_run_on_step_failure=True,
)

# get a URL for the status of the job
returned_job.services["Studio"].endpoint

compute is not a known attribute of class <class 'azure.ml._restclient.v2021_10_01.models._models_py3.PipelineJob'> and will be ignored


'https://ml.azure.com/runs/860a9fc2-c3ac-4528-8477-972ece117cfa?wsid=/subscriptions/48bbc269-ce89-4f6f-9a12-c6f91fcb772d/resourcegroups/aml1p-rg/workspaces/aml1p-ml-eus2&tid=72f988bf-86f1-41af-91ab-2d7cd011db47'