# How to Publish a Pipeline and Invoke the REST endpoint
In this notebook, we will see how we can publish a pipeline and then invoke the REST endpoint.

### Initialization Steps

In [None]:
import azureml.core
from azureml.core import Workspace, Datastore, Experiment, Dataset
from azureml.data import OutputFileDatasetConfig
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core.graph import PipelineParameter

print("Pipeline SDK-specific imports completed")

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

# Default datastore (Azure blob storage)
# def_blob_store = ws.get_default_datastore()
def_blob_store = Datastore(ws, "workspaceblobstore")
print("Blobstore's name: {}".format(def_blob_store.name))

### Compute Targets
#### Retrieve an already attached  Azure Machine Learning Compute

In [None]:
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your cluster.
amlcompute_cluster_name = "cpu-cluster"

found = False
# Check if this compute target already exists in the workspace.
cts = ws.compute_targets
if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':
    found = True
    print('Found existing compute target.')
    compute_target = cts[amlcompute_cluster_name]
    
if not found:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2", # for GPU, use "STANDARD_NC6"
                                                                #vm_priority = 'lowpriority', # optional
                                                                max_nodes = 4)

    # Create the cluster.
    aml_compute = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)
    
    # Can poll for a minimum number of nodes and for a specific timeout.
    # If no min_node_count is provided, it will use the scale settings for the cluster.
    aml_compute.wait_for_completion(show_output = True, timeout_in_minutes = 10)
    
     # For a more detailed view of current AmlCompute status, use get_status().

In [None]:
# For a more detailed view of current Azure Machine Learning Compute status, use get_status()
# example: un-comment the following line.
# print(aml_compute.get_status().serialize())

## Building Pipeline Steps with Inputs and Outputs
A step in the pipeline can take dataset as input. This dataset can be a data source that lives in one of the accessible data locations, or intermediate data produced by a previous step in the pipeline.

In [None]:
# Uploading data to the datastore
data_path = def_blob_store.upload_files(["./20news.pkl"], target_path="20newsgroups", overwrite=True)

In [None]:
# Reference the data uploaded to blob storage using file dataset
# Assign the datasource to blob_input_data variable
blob_input_data = Dataset.File.from_files(data_path).as_named_input("test_data")
print("Dataset created")

In [None]:
# Define intermediate data using OutputFileDatasetConfig
processed_data1 = OutputFileDatasetConfig(name="processed_data1")
print("Output dataset object created")

#### Define a Step that consumes a dataset and produces intermediate data.
In this step, we define a step that consumes a dataset and produces intermediate data.

In [None]:
# trainStep consumes the datasource (Datareference) in the previous step
# and produces processed_data1

source_directory = "publish_run_train"

trainStep = PythonScriptStep(
    script_name="train.py", 
        arguments=["--input_data", blob_input_data.as_mount(), "--output_train", processed_data1],
    compute_target= aml_compute, 
    source_directory=source_directory
)
print("trainStep created")

#### Define a Step that consumes intermediate data and produces intermediate data
In this step, we define a step that consumes an intermediate data and produces intermediate data.

In [None]:
# extractStep to use the intermediate data produced by trainStep
# This step also produces an output processed_data2
processed_data2 = OutputFileDatasetConfig(name="processed_data2")
source_directory = "publish_run_extract"

extractStep = PythonScriptStep(
    script_name="extract.py",
    arguments=["--input_extract", processed_data1.as_input(), "--output_extract", processed_data2],
    compute_target=aml_compute, 
    source_directory=source_directory)
print("extractStep created")

#### Define a Step that consumes multiple intermediate data and produces intermediate data
In this step, we define a step that consumes multiple intermediate data and produces intermediate data.

### PipelineParameter
This step also has a PipelineParameter argument that help with calling the REST endpoint of the published pipeline.

In [None]:
# We will use this later in publishing pipeline
pipeline_param = PipelineParameter(name="pipeline_arg", default_value=10)
print("pipeline parameter created")

**Open `compare.py` in the local machine and examine the arguments, inputs, and outputs for the script. That will give you a good sense of why the script argument names used below are important.**

In [None]:
# Now define compareStep that takes two inputs (both intermediate data), and produce an output
processed_data3 = OutputFileDatasetConfig(name="processed_data3")

# You can register the output as dataset after job completion
processed_data3 = processed_data3.register_on_complete("compare_result")

source_directory = "publish_run_compare"

compareStep = PythonScriptStep(
    script_name="compare.py",
    arguments=["--compare_data1", processed_data1.as_input(), "--compare_data2", processed_data2.as_input(), "--output_compare", processed_data3, "--pipeline_param", pipeline_param],  
    compute_target= aml_compute, 
    source_directory=source_directory)
print("compareStep created")

#### Build the pipeline

In [None]:
pipeline1 = Pipeline(workspace=ws, steps=[compareStep])
print ("Pipeline is built")

## Run published pipeline
### Publish the pipeline

In [None]:
published_pipeline1 = pipeline1.publish(name="My_New_Pipeline", description="My Published Pipeline Description", continue_on_step_failure=True)
published_pipeline1

### Publish the pipeline from a submitted PipelineRun
It is also possible to publish a pipeline from a submitted PipelineRun

In [None]:
# submit a pipeline run
pipeline_run1 = Experiment(ws, 'Pipeline_experiment').submit(pipeline1)
# publish a pipeline from the submitted pipeline run
published_pipeline2 = pipeline_run1.publish_pipeline(name="My_New_Pipeline2", description="My Published Pipeline Description", version="0.1", continue_on_step_failure=True)
published_pipeline2

### Get published pipeline

You can get the published pipeline using **pipeline id**.

To get all the published pipelines for a given workspace(ws): 
```css
all_pub_pipelines = PublishedPipeline.get_all(ws)
```

In [None]:
from azureml.pipeline.core import PublishedPipeline

pipeline_id = published_pipeline1.id # use your published pipeline id
published_pipeline = PublishedPipeline.get(ws, pipeline_id)
published_pipeline

### Run published pipeline using its REST endpoint
[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace.

In [None]:
from azureml.core.authentication import InteractiveLoginAuthentication
import requests

auth = InteractiveLoginAuthentication()
aad_token = auth.get_authentication_header()

rest_endpoint = published_pipeline.endpoint

print("You can perform HTTP POST on URL {} to trigger this pipeline".format(rest_endpoint))

# specify the param when running the pipeline
response = requests.post(rest_endpoint, 
                         headers=aad_token, 
                         json={"ExperimentName": "My_Pipeline1",
                               "RunSource": "SDK",
                               "ParameterAssignments": {"pipeline_arg": 45}})

In [None]:
try:
    response.raise_for_status()
except Exception:    
    raise Exception('Received bad response from the endpoint: {}\n'
                    'Response Code: {}\n'
                    'Headers: {}\n'
                    'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))

run_id = response.json().get('Id')
print('Submitted pipeline run: ', run_id)