In [1]:
import os
import azureml.core
from azureml.core import Workspace, Experiment, Datastore
from azureml.widgets import RunDetails

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep

print("Pipeline SDK-specific imports completed")

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

# Default datastore
def_blob_store = ws.get_default_datastore() 
# The following call GETS the Azure Blob Store associated with your workspace.
# Note that workspaceblobstore is **the name of this store and CANNOT BE CHANGED and must be used as is** 
def_blob_store = Datastore(ws, "workspaceblobstore")
print("Blobstore's name: {}".format(def_blob_store.name))



SDK version: 1.26.0
Pipeline SDK-specific imports completed
opendatasetspmworkspace
opendatasetspmrg
eastus2
21d8f407-c4c4-452e-87a4-e609bfb86248
Blobstore's name: workspaceblobstore


In [4]:


# Choose a name for your cluster.
amlcompute_cluster_name = "cpu-cluster"

found = False
# Check if this compute target already exists in the workspace.
cts = ws.compute_targets
if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':
    found = True
    print('Found existing compute target.')
    compute_target = cts[amlcompute_cluster_name]
    
if not found:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2", # for GPU, use "STANDARD_NC6"
                                                                #vm_priority = 'lowpriority', # optional
                                                                max_nodes = 4)

    # Create the cluster.
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)
    
    # Can poll for a minimum number of nodes and for a specific timeout.
    # If no min_node_count is provided, it will use the scale settings for the cluster.
    compute_target.wait_for_completion(show_output = True, timeout_in_minutes = 10)
    
     # For a more detailed view of current AmlCompute status, use get_status()

Found existing compute target.


In [9]:
import azureml.core
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.data.datapath import DataPath, DataPathComputeBinding
from azureml.widgets import RunDetails

from azureml.pipeline.core import Pipeline, PipelineRun, PipelineData, PipelineParameter
from azureml.pipeline.steps import PythonScriptStep


In [58]:
%%writefile string_from_pipeline_param.py
import argparse
import os
import os.path

parser = argparse.ArgumentParser("string_input")
parser.add_argument("--string", type=str, help="sample string argument")
parser.add_argument("--output_path", type=str, help="path to write output")
args = parser.parse_args()

print("Sample string pipeline paramater input: %s" % args.string)

print (f"Checking if output_path: {args.output_path} is mounted: {os.path.isdir(args.output_path)}")

print (f"Writing '{args.string}' to file {os.path.join(args.output_path,'string.txt')}")

with open(os.path.join(args.output_path,'string.txt'), 'w+') as text_file:
    print(f"{args.string}", file=text_file)


Overwriting string_from_pipeline_param.py


In [59]:
%%writefile string_from_pipeline_data.py
import argparse
import os

parser = argparse.ArgumentParser("string_input")
parser.add_argument("--input_path", type=str, help="path to read input")
parser.add_argument("--output_path", type=str, help="path to write output")
args = parser.parse_args()


filename = "string.txt"
print (f"reading file: {os.path.join(args.input_path, filename)})
with open(os.path.join(args.input_path, filename), 'r') as handle:
    string = handle.read()

print (f"Found in string.txt: {string}")

print (f"Writing '{string}' to file {os.path.join(args.output_path,'string.txt')}")
with open(os.path.join(args.output_path,"string.txt"), "w+") as text_file:
    print(f"{string}", file=text_file)


Overwriting string_from_pipeline_data.py


In [60]:
string_pipeline_param = PipelineParameter(name="input_string", default_value='hello world')
processed_data0 = PipelineData("processed_data0", output_mode='mount')

string_from_pipeline_param = PythonScriptStep(
    name='string_from_pipeline_data',
    script_name="string_from_pipeline_param.py",
    arguments=["--string", string_pipeline_param, "--output_path", processed_data0],
    outputs=[processed_data0],
    compute_target=compute_target, 
    source_directory="./")
print("string_from_pipeline_data created")


string_from_pipeline_data created


In [61]:
processed_data1 = PipelineData("processed_data1", output_mode='mount')

string_from_pipeline_param_1 = PythonScriptStep(
    name='string_from_pipeline_data_1',
    script_name="string_from_pipeline_data.py",
    arguments=["--input_path", processed_data0, "--output_path", processed_data1],
    inputs=[processed_data0],
    outputs=[processed_data1],
    compute_target=compute_target, 
    source_directory="./")
print("string_from_pipeline_data_1 created")


string_from_pipeline_data_1 created


In [62]:
processed_data2 = PipelineData("processed_data2", output_mode='mount')

string_from_pipeline_param_2 = PythonScriptStep(
    name='string_from_pipeline_data_2',
    script_name="string_from_pipeline_data.py",
    arguments=["--input_path", processed_data1, "--output_path", processed_data2],
    inputs=[processed_data1],
    outputs=[processed_data2],
    compute_target=compute_target, 
    source_directory="./")
print("string_from_pipeline_data_2 created")


string_from_pipeline_data_2 created


In [63]:
pipeline = Pipeline(workspace=ws, steps=[string_from_pipeline_param,string_from_pipeline_param_1,string_from_pipeline_param_2])
experiment = Experiment(ws, 'string-exp')
#pipeline_run = experiment.submit(pipeline)
pipeline_run_with_params = experiment.submit(pipeline,
                                             pipeline_parameters={'input_string': 'hello python world'})

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


Created step string_from_pipeline_data [7626c0bf][177cd43b-81b8-4012-9b0f-e188a89c4874], (This step will run and generate new outputs)Created step string_from_pipeline_data_1 [f2120e66][9d969e55-b594-4e47-8b37-c4e2aa15046c], (This step will run and generate new outputs)
Created step string_from_pipeline_data_2 [832618dd][dcec442a-ba89-4333-b90a-3a7f51ca06b7], (This step will run and generate new outputs)

Submitted PipelineRun 704d4eed-646d-464d-9426-94754bda7bde
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/704d4eed-646d-464d-9426-94754bda7bde?wsid=/subscriptions/21d8f407-c4c4-452e-87a4-e609bfb86248/resourcegroups/opendatasetspmrg/workspaces/opendatasetspmworkspace&tid=72f988bf-86f1-41af-91ab-2d7cd011db47
