In [1]:
from utils import *

In [2]:
from azureml.core import Workspace

# Configure experiment
ws = Workspace.from_config()

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


In [3]:
# Create or get training cluster
aml_cluster = get_aml_cluster(ws, cluster_name="cpu-cluster")
aml_cluster.wait_for_completion(show_output=True)

Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
# Create a run configuration
run_conf = get_run_config(['numpy', 'pandas', 'scikit-learn', 'tensorflow'])

In [5]:
from azureml.core import Dataset

dataset = Dataset.get_by_name(ws, name='titanic')
data_in = dataset.as_named_input('titanic')

In [6]:
from azureml.core import Datastore
from azureml.pipeline.core import PipelineData

datastore = Datastore.get(ws, datastore_name="mldata")
data_train = PipelineData('train', datastore=datastore)
data_test = PipelineData('test', datastore=datastore)

In [7]:
from azureml.data import OutputFileDatasetConfig

data_out = OutputFileDatasetConfig(name="predictions", destination=(datastore, 'titanic/predictions'))
data_out = data_out.read_delimited_files().register_on_complete('titanic.pred')

In [8]:
from azureml.pipeline.steps import PythonScriptStep

step_1 = PythonScriptStep(name='Preprocessing',
                          script_name="preprocess_output.py",
                          source_directory="code",
                          arguments=[
                              "--input", data_in,
                              "--out-train", data_train,
                              "--out-test", data_test],
                          inputs=[data_in],
                          outputs=[data_train, data_test],
                          runconfig=run_conf,
                          compute_target=aml_cluster)


In [9]:
from azureml.pipeline.steps import PythonScriptStep

step_2 = PythonScriptStep(name='Training',
                          script_name="train_output.py",
                          source_directory="code",
                          arguments=[
                              "--in-train", data_train,
                              "--in-test", data_test,
                              "--output", data_out],
                          inputs=[data_train, data_test],
                          outputs=[data_out],
                          runconfig=run_conf,
                          compute_target=aml_cluster)

In [10]:
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(ws, steps=[step_1, step_2])

In [11]:
pipeline.validate()

[]

In [13]:
from azureml.pipeline.core import PublishedPipeline

for pipeline in PublishedPipeline.list(ws):
    print("name: %s, id: %s" % (pipeline.name, pipeline.id))

pipeline = PublishedPipeline.list(ws)[0]

name: AzureML Published Pipeline, id: 8ce894d6-5aea-406e-9669-d0edbe6eb19e
name: AzureML Published Pipeline, id: b67b39ba-04fd-43ed-8351-bc4046608d6b


In [14]:
from azureml.pipeline.core.schedule import ScheduleRecurrence, Schedule

recurrence = ScheduleRecurrence(frequency="Minute", interval=15)

schedule = Schedule.create(ws,
                           name="AzureML Scheduled Pipeline", 
                           pipeline_id=pipeline.id,
                           experiment_name="azureml-pipeline-schedule", 
                           recurrence=recurrence,
                           pipeline_parameters={})

In [16]:
# schedule.disable()