In [1]:
from azureml.core import Workspace, Experiment, Datastore
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.widgets import RunDetails
from azureml.train.estimator import Estimator

print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.53


# Register/Reference a Datastore

In [6]:
# workspace
ws = Workspace.from_config(
    path='./azureml-config.json') # -seth-config.json if using HAL, remove -seth if using mine
ws.datastores

{'workspaceblobstore': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x11ee36c18>,
 'workspacefilestore': <azureml.data.azure_storage_datastore.AzureFileDatastore at 0x11dc96668>,
 'damoseerdata': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x11dd75978>}

In [7]:
#datastore = Datastore.register_azure_blob_container(workspace=ws, 
#                                             datastore_name='damoseerdata', 
#                                             container_name='seer-container',
#                                             account_name='damoaimlstorage', 
#                                             account_key='PEeXp/eF8ddjhFWDrGY6VRL+uuZTj5AxxuZKZaPuoOsUNBEnNpBw0CUIARKs44SMKbHUI8rSfv3hkB1N6zYaAw==',
#                                             create_if_not_exists=True)

In [8]:
# data
datastore = ws.datastores['damoseerdata']

# compute target
compute = ws.compute_targets['damoseercompute']

compute

AmlCompute(workspace=Workspace.create(name='damo-mlworkspace', subscription_id='bc202ec2-54ef-4576-b7fb-a961c983398e', resource_group='damo-aiml'), name=damoseercompute, id=/subscriptions/bc202ec2-54ef-4576-b7fb-a961c983398e/resourceGroups/damo-aiml/providers/Microsoft.MachineLearningServices/workspaces/damo-mlworkspace/computes/damoseercompute, type=AmlCompute, provisioning_state=Succeeded, location=australiaeast, tags=None)

# Define Pipeline!
The following will be created and then run:
1. Pipeline Parameters
2. Data Fetch Step
3. Data Process Step
4. Training Step
5. Model Registration Step


## Pipeline Parameters
We need to tell the Pipeline what it needs to learn to see!

In [9]:
datapath = DataPath(datastore=datastore, path_on_datastore='hardware')
data_path_pipeline_param = (PipelineParameter(name="data", 
                                             default_value=datapath), 
                                             DataPathComputeBinding(mode='mount'))
data_path_pipeline_param

(<azureml.pipeline.core.graph.PipelineParameter at 0x11ee60898>,
 <azureml.data.datapath.DataPathComputeBinding at 0x11ee60e10>)

## Data Process Step

In [10]:
seer_tfrecords = PipelineData(
    "tfrecords_set",
    datastore=datastore,
    is_directory=True
)

prep = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='parse.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

prepStep = EstimatorStep(
    name='Data Preparation',
    estimator=prep,
    estimator_entry_script_arguments=["--source_path", data_path_pipeline_param, 
                                      "--target_path", seer_tfrecords],
    inputs=[data_path_pipeline_param],
    outputs=[seer_tfrecords],
    compute_target=compute
)

prepStep

<azureml.pipeline.steps.estimator_step.EstimatorStep at 0x11ddaf3c8>

## Training Step

In [11]:
seer_training = PipelineData(
    "train",
    datastore=datastore,
    is_directory=True
)

train = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='train.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

trainStep = EstimatorStep(
    name='Model Training',
    estimator=train,
    estimator_entry_script_arguments=["--source_path", seer_tfrecords, 
                                      "--target_path", seer_training,
                                      "--epochs", 5,
                                      "--batch", 10,
                                      "--lr", 0.001],
    inputs=[seer_tfrecords],
    outputs=[seer_training],
    compute_target=compute
)

trainStep

<azureml.pipeline.steps.estimator_step.EstimatorStep at 0x11ee4e400>

# Register Model Step

In [12]:
seer_model = PipelineData(
    "model",
    datastore=datastore,
    is_directory=True
)

register = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='register.py',
                      use_gpu=True)

registerStep = EstimatorStep(
    name='Model Registration',
    estimator=register,
    estimator_entry_script_arguments=["--source_path", seer_training, 
                                      "--target_path", seer_model],
    inputs=[seer_training],
    outputs=[seer_model],
    compute_target=compute
)

registerStep

<azureml.pipeline.steps.estimator_step.EstimatorStep at 0x11ddc3550>

## Test Run

In [13]:
pipeline1 = Pipeline(workspace=ws, steps=[prepStep, trainStep, registerStep])

pipeline1

<azureml.pipeline.core.pipeline.Pipeline at 0x11ee36be0>

In [14]:
# Submit the pipeline to be run
pipeline_run1 = Experiment(ws, 'seer').submit(pipeline1)
RunDetails(pipeline_run1).show()

Created step Data Preparation [5ded6486][464792fb-daf7-4e14-85a7-89e211d50d32], (This step will run and generate new outputs)
Created step Model Training [679c3418][b8a5bab6-0b3b-4edb-aecc-1c183694b13d], (This step will run and generate new outputs)
Created step Model Registration [dbab6cf8][721de658-8e1a-4937-920b-b5ff242dc423], (This step will run and generate new outputs)
Created data reference damoseerdata_312cbca7 for StepId [c7220217][244139f4-deb1-47ee-a7c6-f5c2b756b327], (Consumers of this data will generate new runs.)
Submitted pipeline run: 3366ffe6-8d2a-499f-be7e-dea54124962b


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [None]:
#pipeline_run1.cancel()

In [None]:
published_pipeline1 = pipeline1.publish(
    name="Seer Pipeline", 
    description="Transfer learned image classifier. Uses folders as labels.")