# Chapter 11 code snippets
This notebook contains all code snippets from chapter 11.

## Authoring a pipeline

In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()
loans_ds = ws.datasets['loans']
compute_target = ws.compute_targets['cpu-sm-cluster']

In [None]:
from azureml.core import RunConfiguration, Environment

runconfig = RunConfiguration()
runconfig.environment = Environment.get(ws, 'AzureML-lightgbm-3.2-ubuntu18.04-py37-cpu')

In [None]:
from azureml.pipeline.core import PipelineData

step01_output = PipelineData(
    "training_data",
    datastore= ws.get_default_datastore(),
    is_directory=True
)

In [None]:
from azureml.pipeline.steps import PythonScriptStep

step_01 = PythonScriptStep(
   'prepare_data.py', 
    source_directory='step01',
    arguments = [
        "--dataset", loans_ds.as_named_input('loans'), 
        "--output-path", step01_output
        ],
    name='Prepare data',
    runconfig=runconfig,
    compute_target=compute_target,
    outputs=[step01_output],
    allow_reuse=True
)

In [None]:
from azureml.pipeline.core import PipelineParameter

learning_rate_param = PipelineParameter(name="learning_rate", default_value=0.05)

In [None]:
from azureml.data import OutputFileDatasetConfig

datastore = ws.get_default_datastore()
step02_output = OutputFileDatasetConfig(
    name= "model_store",
    destination=(datastore, '/models/loans/'))

In [None]:
step_02 = PythonScriptStep(
   'train_model.py', 
    source_directory='step02',
    arguments=[
        "--learning-rate", learning_rate_param,
        "--input-path", step01_output,
        "--output-path", step02_output
        ],
    name='Train model',
    runconfig=runconfig,
    compute_target=compute_target,
    inputs=[step01_output],
    outputs=[step02_output]
)

In [None]:
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(workspace=ws, steps=[step_01, step_02])

In [None]:
from azureml.core import Experiment

experiment = Experiment(ws, "chapter-11-runs")
pipeline_run = experiment.submit(
    pipeline,
    pipeline_parameters= {
        "learning_rate" : 0.5
    }
)
pipeline_run.wait_for_completion()

In [None]:
# If you wanted to pass the dataset as a pipeline parameter, use the following code
# from azureml.data.dataset_consumption_config import DatasetConsumptionConfig
# dataset_pipeline_param = PipelineParameter(name="dataset_param", default_value=loans_ds)
# dataset_consumption = DatasetConsumptionConfig("loans", dataset_pipeline_param)
# step_01 = PythonScriptStep(
#    'prepare_data.py', 
#     source_directory='step01',
#     arguments = [
#         "--dataset", dataset_consumption, 
#         "--output-path", step01_output
#         ],
#     name='Prepare data',
#     runconfig=runconfig,
#     compute_target=compute_target,
#     outputs=[step01_output],
#     allow_reuse=True
# )

### Troubleshooting code issues

In [None]:
train_step_run = pipeline_run.find_step_run("Train model")[0]
train_step_run.get_details_with_logs()

## Publishing a pipeline to expose it as an endpoint

In [None]:
published_pipeline = pipeline.publish(
    "Loans training pipeline", 
    description="A pipeline to train a LightGBM model")

In [None]:
from azureml.core.authentication import InteractiveLoginAuthentication

auth = InteractiveLoginAuthentication()
aad_token = auth.get_authentication_header()

In [None]:
import requests
response = requests.post(published_pipeline.endpoint, 
                         headers=aad_token, 
                         json={"ExperimentName": "chapter-11-runs",
                               "ParameterAssignments": {"learning_rate" : 0.02}})

print(f"Made a POST request to {published_pipeline.endpoint} and got {response.status_code}.")
print(f"The portal url for the run is {response.json()['RunUrl']}")

## Scheduling a recuring pipeline

In [None]:
from azureml.pipeline.core.schedule import ScheduleRecurrence, Schedule
from datetime import datetime

recurrence = ScheduleRecurrence(frequency="Month", 
                                interval=1, 
                                start_time = datetime.now())

schedule = Schedule.create(workspace=ws, name="chapter-11-schedule",
                           pipeline_id=published_pipeline.id, 
                           experiment_name="chapter-11-scheduled-run",
                           recurrence=recurrence,
                           wait_for_provisioning=True,
                           description="Schedule to retrain model")

print("Created schedule with id: {}".format(schedule.id))

In [None]:
from azureml.pipeline.core.schedule import Schedule

schedules = Schedule.list(ws, active_only=True) 
print("Your workspace has the following schedules set up:")
for schedule in schedules:
    print(f"Disabling {schedule.id} (Published pipeline: {schedule.pipeline_id}")
    schedule.disable(wait_for_provisioning=True)