In [None]:
from azureml.core import Workspace
from azureml.core import Environment
from azureml.core.environment import CondaDependencies
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.runconfig import RunConfiguration
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core  import PipelineData
from azureml.pipeline.core import Pipeline
from azureml.core import Experiment

In [None]:
# Access the Workspace
ws = Workspace.from_config("./config")

In [None]:
# Create the environment
myenv = Environment(name="MyEnvironment")

In [None]:
# Create the dependencies object
myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn'])
myenv.python.conda_dependencies = myenv_dep

In [None]:
# Register the environment
myenv.register(ws)

In [None]:
# Create a compute cluster for pipeline
cluster_name = "pipeline-cluster"
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D11_V2', max_nodes=2)
compute_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
compute_cluster.wait_for_completion()

In [None]:
# Create Run Configurations for the steps
run_config = RunConfiguration()
run_config.target = compute_cluster
run_config.environment = myenv

In [None]:
# Define Pipeline steps
input_ds = ws.datasets.get('Defaults')
dataFolder = PipelineData('datafolder', datastore=ws.get_default_datastore())

In [None]:
# Step 01 - Data Preparation
dataPrep_step = PythonScriptStep(name='01 Data Preparation',
                                 source_directory='.',
                                 script_name='220 - Dataprep Pipeline.py',
                                 inputs=[input_ds.as_named_input('raw_data')],
                                 outputs=[dataFolder],
                                 runconfig=run_config,
                                 arguments=['--datafolder', dataFolder])

In [None]:
# Step 02 - Train the model
train_step    = PythonScriptStep(name='02 Train the Model',
                                 source_directory='.',
                                 script_name='220 - Training Pipeline.py',
                                 inputs=[dataFolder],
                                 runconfig=run_config,
                                 arguments=['--datafolder', dataFolder])

In [None]:
# Configure and build the pipeline
steps = [dataPrep_step, train_step]
new_pipeline = Pipeline(workspace=ws, steps=steps)

In [None]:
# Create the experiment and run the pipeline
new_experiment = Experiment(workspace=ws, name='PipelineExp01')
new_pipeline_run = new_experiment.submit(new_pipeline)
new_pipeline_run.wait_for_completion(show_output=True)