# Putting the full Pipeline

## Import the Packages

In [1]:
from azureml.core import Workspace, Datastore, Dataset, Experiment, Environment

## Connnect to the AML Workspace

In [3]:
#connect to the workspace
ws = Workspace.from_config(".azure")

# get the compute target
compute_target = ws.compute_targets["cpu-cluster"]

# get the default datastore
datastore = ws.get_default_datastore()

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


## Define the RunConfig

In [12]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
from azureml.core.environment import CondaDependencies

from azureml.core import ScriptRunConfig

# create a new runconfig object
run_config = RunConfiguration()

# # enable Docker 
run_config.environment.docker.enabled = True

# # set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

# # # use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = True

# # specify CondaDependencies obj
run_config.environment = Environment.from_conda_specification(name = "train-env", file_path = "environment.yml")

## Define the Pipeline Steps

In [13]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.core import Pipeline, StepSequence

# create teh ingest pipeline step
data_ingest_step = PythonScriptStep(
    script_name="data_ingest.py",
    source_directory="src",
    # outputs = [diabetes_ds],
    compute_target=compute_target)

# define the preprocessing pipeline step
data_prep_step = PythonScriptStep(
    script_name="preprocessing.py",
    source_directory="src",
    compute_target=compute_target)

# define the train pipeline step
train_DTC_step = PythonScriptStep(
    script_name="train_DTC.py",
    source_directory="src",
    compute_target="cpu-cluster",
    runconfig=run_config)

# define the pipeline step
deploy_step = PythonScriptStep(
    script_name="deploy.py",
    source_directory="src",
    compute_target=compute_target)

# run the steps in a sequence
step_sequence = StepSequence(steps=[data_ingest_step, data_prep_step, train_DTC_step, deploy_step])

# create the pipeline
pipeline = Pipeline(workspace=ws, steps=step_sequence)

## Run the Pipeline

In [14]:
pipeline_run = Experiment(ws, 'full_pipeline').submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

Created step data_ingest.py [4a211669][51da7a5b-0320-4797-82ef-dd5a07ef40f5], (This step is eligible to reuse a previous run's output)
Created step preprocessing.py [39ebe20d][c2416ce2-534f-49f6-953b-659f15374c43], (This step will run and generate new outputs)
Created step train_DTC.py [6f0ff3ed][141c6cae-f40b-4c8f-b6f1-65d4f6e3943f], (This step will run and generate new outputs)
Created step deploy.py [6103da46][30f0cc64-d6a0-4a7e-b52a-6e6f85099042], (This step will run and generate new outputs)
Submitted PipelineRun 57d73fbf-a532-4cb0-b5bb-6fc6228b4361
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/full_pipeline/runs/57d73fbf-a532-4cb0-b5bb-6fc6228b4361?wsid=/subscriptions/3a0172d3-ec0d-46bb-a88a-ff41a302711a/resourcegroups/Evonik/workspaces/AMLWorkspace
PipelineRunId: 57d73fbf-a532-4cb0-b5bb-6fc6228b4361
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/full_pipeline/runs/57d73fbf-a532-4cb0-b5bb-6fc6228b4361?wsid=/subscriptions/3a0172d3