In [1]:
from azureml.core import Workspace, Experiment, Datastore
from azureml.data.data_reference import DataReference
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.widgets import RunDetails
from azureml.train.estimator import Estimator

print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.53


# Register/Reference a Datastore

In [2]:
#datastore = Datastore.register_azure_blob_container(workspace=ws, 
#                                             datastore_name='seerdata', 
#                                             container_name='your azure blob container name',
#                                             account_name='your storage account name', 
#                                             account_key='your storage account key',
#                                             create_if_not_exists=True)

In [3]:
# workspace
ws = Workspace.from_config()
ws.datastores

{'workspacefilestore': <azureml.data.azure_storage_datastore.AzureFileDatastore at 0x14518696358>,
 'workspaceblobstore': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x1451869f240>,
 'halworkspacestorage__datasets': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x145186965c0>,
 'seerdata': <azureml.data.azure_storage_datastore.AzureBlobDatastore at 0x14518696cf8>}

In [4]:
# data
datastore = ws.datastores['seerdata']
datareference = DataReference(
    datastore=datastore,
    data_reference_name="seerdata")

# compute target
compute = ws.compute_targets['gandalf']

## Data Download Step

In [5]:
tacosburritos_dataset = PipelineData(
    "training_set",
    datastore=datastore,
    is_directory=True)

fetchStep = PythonScriptStep(
    name="Data Fetch",
    script_name="fetch.py",
    arguments=["--target_path", tacosburritos_dataset, "--categories", "tacos", "burrito"],
    inputs=[],
    outputs=[tacosburritos_dataset],
    compute_target=compute,
    source_directory="."
)

## Data Process Step

In [6]:
tacosburritos_tfrecords = PipelineData(
    "tfrecords_set",
    datastore=datastore,
    is_directory=True
)

prep = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='prep.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

prepStep = EstimatorStep(
    name='Data Preparation',
    estimator=prep,
    estimator_entry_script_arguments=["--source_path", tacosburritos_dataset, 
                                      "--target_path", tacosburritos_tfrecords],
    inputs=[tacosburritos_dataset],
    outputs=[tacosburritos_tfrecords],
    compute_target=compute
)

## Training Step

In [20]:
tacosburritos_models = PipelineData(
    "models",
    datastore=datastore,
    is_directory=True
)

train = Estimator(source_directory='.',
                      compute_target=compute,
                      entry_script='train.py',
                      use_gpu=True,
                      pip_requirements_file='requirements.txt')

trainStep = EstimatorStep(
    name='Model Training',
    estimator=train,
    estimator_entry_script_arguments=["--source_path", tacosburritos_tfrecords, 
                                      "--target_path", tacosburritos_models,
                                      "--epochs", 5,
                                      "--batch", 10,
                                      "--lr", 0.001],
    inputs=[tacosburritos_tfrecords],
    outputs=[tacosburritos_models],
    compute_target=compute
)

## Test Run

In [21]:
pipeline1 = Pipeline(workspace=ws, steps=[fetchStep, prepStep, trainStep])

In [22]:
# Submit the pipeline to be run
pipeline_run1 = Experiment(ws, 'seer').submit(pipeline1)
RunDetails(pipeline_run1).show()

Created step Data Fetch [28c1c694][5d4d5572-5d60-4dd6-a4ff-1cb22d8105da], (This step will run and generate new outputs)
Created step Data Preparation [4ba3cc0b][1968a44b-d27e-46d3-b0f0-4fe06408301f], (This step will run and generate new outputs)
Created step Model Training [4a688288][bac02f9b-de98-49fe-b655-3eda322fe3f6], (This step will run and generate new outputs)
Submitted pipeline run: 8762f0b9-4c74-455e-b85b-72178fa54f67


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …