In [1]:
from utils import *

In [2]:
from azureml.core import Workspace

# Configure experiment
ws = Workspace.from_config()

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


In [3]:
# Create or get training cluster
aml_cluster = get_aml_cluster(ws, cluster_name="cpu-cluster")
aml_cluster.wait_for_completion(show_output=True)

Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
# Create a run configuration
run_conf = get_run_config(['numpy', 'pandas', 'scikit-learn', 'tensorflow'])

In [5]:
from azureml.core import Dataset

dataset = Dataset.get_by_name(ws, name='titanic')
data_in = dataset.as_named_input('titanic')

In [6]:
from azureml.core import Datastore
from azureml.pipeline.core import PipelineData

datastore = Datastore.get(ws, datastore_name="mldata")
results = PipelineData('results', datastore=datastore)

In [13]:
from azureml.pipeline.core import PipelineParameter
from azureml.pipeline.steps import ParallelRunConfig

parallel_run_config = ParallelRunConfig(
    entry_script='score.py',
    source_directory='code',
    mini_batch_size=5,
    error_threshold=10,
    output_action="append_row",
    append_row_file_name="parallel_run_step.txt",
    environment=run_conf.environment,
    compute_target=aml_cluster,
    process_count_per_node=2,
    node_count=2)


In [14]:
from azureml.pipeline.steps import ParallelRunStep

parallelrun_step = ParallelRunStep(
    name="ScoreParallel",
    parallel_run_config=parallel_run_config,
    inputs=[data_in],
    output=results,
    allow_reuse=True
)


ParallelRunStep requires azureml-dataset-runtime[fuse,pandas] for tabular dataset.
Please add relevant package in CondaDependencies.


In [15]:
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(ws, steps=[parallelrun_step])

In [16]:
pipeline.validate()

Step ScoreParallel is ready to be created [69070327]


[]

In [17]:
from azureml.core import Experiment

exp = Experiment(ws, "azureml-parallel-pipeline")
run = exp.submit(pipeline)

Created step ScoreParallel [69070327][99280fb6-39b3-4377-9778-bd0f4ee14f74], (This step will run and generate new outputs)
Submitted PipelineRun 310f8683-cebb-407c-a0b9-9711944b0a1d
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/310f8683-cebb-407c-a0b9-9711944b0a1d?wsid=/subscriptions/21dc412b-d9eb-42e7-8317-55bc8eb10cf5/resourcegroups/packt/workspaces/mldemows&tid=e702ab66-1ac1-42ba-b41b-8195d943a1e0


In [18]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …