## Initializing Azure Machine Learning Workspace

In [1]:
import azureml.core
from azureml.core import Workspace
from azureml.core import Run, Experiment, Datastore
from azureml.widgets import RunDetails

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

def_blob_store = Datastore(ws, "workspaceblobstore")
print("Blobstore's name: {}".format(def_blob_store.name))

SDK version: 0.1.0.0
pudixit-ws
pudixit-rg
westus2
b8c23406-f9b5-4ccb-8a65-a8cb5dcd6a5a
Blobstore's name: workspaceblobstore


## Using Azure Machine Learning Compute

In [2]:
from azureml.core.compute import AmlCompute, ComputeTarget

aml_compute_target = "cpu-cluster"
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("Found existing compute target: {}".format(aml_compute_target))
except:
    print("Creating new compute target: {}".format(aml_compute_target))
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1, 
                                                                max_nodes = 4)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

Found existing compute target: cpu-cluster


## Run Configuration for Training Step

In [3]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

# create a new runconfig object
run_config = RunConfiguration()

# enable Docker 
run_config.environment.docker.enabled = True

# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False

# specify CondaDependencies obj
run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],
                                                                            pip_packages=['azure-cognitiveservices-vision-customvision'],
                                                                            pin_sdk_version=False)

## Build and Publish Pipeline For Training

Build a pipeline to train on comics images, publish it and add a schedule to run it.

### Preparing pipeline parameters and inputs

These will be pipeline parameters which can be override while submitting a new run from the pipeline. To keep it simple, we will provide one tag and an AzureBlob path `image_data` to the images related to that tag. We will then read the files which are prefixed with the `tag_name`.

In [24]:
from azureml.pipeline.core import PipelineParameter
from azureml.data.datapath import DataPath, DataPathComputeBinding

tag_name = PipelineParameter("image_tag", default_value="Dog")

datapath = DataPath(datastore=def_blob_store, path_on_datastore='comics_images/Dog')
datapath_param = PipelineParameter(name="image_data", default_value=datapath)
image_data = (datapath_param, DataPathComputeBinding(mode='mount'))

### Define the training step

In [46]:
from azureml.pipeline.steps import PythonScriptStep

trainStep = PythonScriptStep(
    name="Training_Step",
    script_name="train.py",
    arguments =["--train_image_tag", tag_name, "--train_image_folder", image_data],
    inputs = [image_data],
    compute_target=aml_compute_target, 
    source_directory='./train_step',
    runconfig=run_config,
    allow_reuse=True
)

print("TrainStep created")

TrainStep created


### Build the pipeline

In [47]:
from azureml.pipeline.core import Pipeline

pipeline1 = Pipeline(workspace=ws, steps=[trainStep])
print ("Pipeline is built")

Pipeline is built


In [48]:
pipeline1.validate()
print("Pipeline is validated")

Step Training_Step is ready to be created [841f8686]
Pipeline is validated


### Run the pipeline 

In [49]:
pipeline_run1 = Experiment(ws, 'comic_reader_training').submit(pipeline1, regenerate_outputs=False)
print("Pipeline is submitted for execution")

Created step Training_Step [841f8686][ca7ad78b-f484-4626-82aa-75dc6f1bc3d5], (This step will run and generate new outputs)
Using data reference workspaceblobstore_1bcf793a for StepId [fee8faac][03158666-aa4c-4ce7-9222-d85b7aa9bb17], (Consumers of this data are eligible to reuse prior runs.)
Submitted pipeline run: 5b23c8c4-24a1-4e9e-8237-3955526b6633
Pipeline is submitted for execution


In [50]:
RunDetails(pipeline_run1).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

## Publish and Setup Schedule for the pipeline

In [51]:
from datetime import datetime

pipeline_name = "comics_trainining_pipeline_v1"
print(pipeline_name)

published_pipeline1 = pipeline1.publish(
    name=pipeline_name, 
    description=pipeline_name)
print("Newly published pipeline id: {}".format(published_pipeline1.id))

comics_trainining_pipeline_v1
Newly published pipeline id: cb85ad6d-0052-4797-bf64-9d5a7874e678
