In [2]:
from azureml.core import Workspace, Experiment, Datastore
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.data.data_reference import DataReference
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.pipeline.core import Pipeline, PipelineData, PipelineParameter
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.widgets import RunDetails
from azureml.train.estimator import Estimator

print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.53


# Register/Reference a Datastore

In [3]:
#datastore = Datastore.register_azure_blob_container(workspace=ws, 
#                                             datastore_name='seerdata', 
#                                             container_name='your azure blob container name',
#                                             account_name='your storage account name', 
#                                             account_key='your storage account key',
#                                             create_if_not_exists=True)

In [4]:
# workspace
ws = Workspace.from_config()
ws.datastores

{'workspacefilestore': <azureml.data.azure_storage_datastore.AzureFileDatastore at 0x2218148bba8>,
 'workspaceblobstore': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x221814c2cc0>,
 'halworkspacestorage__datasets': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x221814c2f98>,
 'seerdata': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x221814d0320>,
 'halworkspacestorage__azureml_blobstore_12257f97_0fe1_48cf_b3e7_ba17edaed331': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x221814d0630>}

In [5]:
# data
datastore = ws.datastores['seerdata']

# compute target
compute = ws.compute_targets['gandalf']

# Define Pipeline!
The following will be created and then run:
1. Pipeline Parameters (path on datastore)
2. Data Prep Step
3. Training Step
4. Model Registration Step


## Pipeline Parameters
We need to tell the Pipeline what it needs to learn to see!

In [16]:
datapath = DataPath(datastore=datastore, path_on_datastore='burrito_tacos')
data_path_pipeline_param = (PipelineParameter(name="data", 
                                             default_value=datapath), 
                                             DataPathComputeBinding(mode='mount'))

## Data Process Step

In [17]:
seer_tfrecords = PipelineData(
    "tfrecords_set",
    datastore=datastore,
    is_directory=True
)

prep = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='parse.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

prepStep = EstimatorStep(
    name='Data Preparation',
    estimator=prep,
    estimator_entry_script_arguments=["--source_path", data_path_pipeline_param, 
                                      "--target_path", seer_tfrecords],
    inputs=[data_path_pipeline_param],
    outputs=[seer_tfrecords],
    compute_target=compute
)

## Training Step

In [18]:
seer_training = PipelineData(
    "train",
    datastore=datastore,
    is_directory=True
)

train = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='train.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

trainStep = EstimatorStep(
    name='Model Training',
    estimator=train,
    estimator_entry_script_arguments=["--source_path", seer_tfrecords, 
                                      "--target_path", seer_training,
                                      "--epochs", 5,
                                      "--batch", 10,
                                      "--lr", 0.001],
    inputs=[seer_tfrecords],
    outputs=[seer_training],
    compute_target=compute
)

# Register Model Step

In [19]:
seer_model = PipelineData(
    "model",
    datastore=datastore,
    is_directory=True
)

register = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='register.py',
                      use_gpu=True)

registerStep = EstimatorStep(
    name='Model Registration',
    estimator=register,
    estimator_entry_script_arguments=["--source_path", seer_training, 
                                      "--target_path", seer_model],
    inputs=[seer_training],
    outputs=[seer_model],
    compute_target=compute
)

## Test Run

In [20]:
pipeline1 = Pipeline(workspace=ws, steps=[prepStep, trainStep, registerStep])

In [21]:
# Submit the pipeline to be run
pipeline_run1 = Experiment(ws, 'seer_next').submit(pipeline1)
RunDetails(pipeline_run1).show()

Created step Data Preparation [bfc55750][5f72260c-c098-4fd6-9a71-66121ce82290], (This step will run and generate new outputs)
Created step Model Training [ec4ed7d5][1da12d8a-aa42-4fc8-ba51-f780b6958d3e], (This step will run and generate new outputs)
Created step Model Registration [8c804dee][8ba99f6a-d7a8-4f36-bd8b-996bc643fc40], (This step will run and generate new outputs)
Created data reference seerdata_7b12afab for StepId [09590dec][8fc2591c-1e3f-4656-8a38-a3241d2a91c2], (Consumers of this data will generate new runs.)
Submitted pipeline run: 1d139a7f-7697-48a3-97cd-03ea2e2bc95f


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [22]:
#pipeline_run1.cancel()

In [15]:
published_pipeline1 = pipeline1.publish(
    name="Seer Pipeline (vNext)", 
    description="Transfer learned image classifier. Uses folders as labels.")