## Initializing Azure Machine Learning Workspace

In [None]:
import azureml.core
from azureml.core import Workspace
from azureml.core import Run, Experiment, Datastore
from azureml.widgets import RunDetails

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

def_blob_store = Datastore(ws, "workspaceblobstore")
print("Blobstore's name: {}".format(def_blob_store.name))

## Using Azure Machine Learning Compute

In [None]:
from azureml.core.compute import AmlCompute, ComputeTarget

aml_compute_target = "cpu-cluster"
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("Found existing compute target: {}".format(aml_compute_target))
except:
    print("Creating new compute target: {}".format(aml_compute_target))
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1, 
                                                                max_nodes = 4)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

## Run Configuration for Training Step

In [None]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

# create a new runconfig object
run_config = RunConfiguration()

# enable Docker 
run_config.environment.docker.enabled = True

# set Docker base image to the default CPU-based image
run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False

# specify CondaDependencies obj
run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],
                                                                            pip_packages=['azure-cognitiveservices-vision-customvision',
                                                                                          'azure-cognitiveservices-search-imagesearch'],
                                                                            pin_sdk_version=False)

## Build and Publish Pipeline For Training

Build a pipeline to train on comics images, publish it and add a schedule to run it.

### Preparing pipeline parameters, inputs and outputs

These will be pipeline parameters which can be override while submitting a new run from the pipeline. To keep it simple, we will provide one `tag_name` and an AzureBlob path `image_folder` where scrapper will download the images based on the `search_term` and save them prefixed with `tag_name`. Our training step will then read the files which are prefixed with the `tag_name` in the `image_folder` folder.

In [None]:
from azureml.pipeline.core import PipelineParameter, PipelineData

image_tag = PipelineParameter("image_tag", default_value="Dog")
search_term = PipelineParameter("search_term", default_value="Adult dog cartoon")
image_folder = PipelineData("comics_images",datastore=def_blob_store)

### Define the image scrapper step

In [None]:
from azureml.pipeline.steps import PythonScriptStep

scrapperStep = PythonScriptStep(
    name="Image_Scrapper_Step",
    script_name="image_scrapper.py",
    arguments =["--scrapper_output_folder", image_folder, "--scrapper_image_tag", image_tag, 
                "--scrapper_search_term", search_term],
    outputs = [image_folder],
    compute_target=aml_compute_target, 
    source_directory='./image_scrapper_step',
    runconfig=run_config,
    allow_reuse=True
)

print("ScrapperStep created")

### Define the training step

In [None]:
trainStep = PythonScriptStep(
    name="Training_Step",
    script_name="train.py",
    arguments =["--train_image_tag", image_tag, "--train_image_folder", image_folder],
    inputs = [image_folder],
    compute_target=aml_compute_target, 
    source_directory='./train_step',
    runconfig=run_config,
    allow_reuse=True
)

print("TrainStep created")

### Build the pipeline

In [None]:
from azureml.pipeline.core import Pipeline

pipeline1 = Pipeline(workspace=ws, steps=[trainStep])
print ("Pipeline is built")

In [None]:
pipeline1.validate()
print("Pipeline is validated")

### Run the pipeline 

In [None]:
pipeline_run1 = Experiment(ws, 'comic_reader_training').submit(pipeline1, regenerate_outputs=False)
print("Pipeline is submitted for execution")

In [None]:
RunDetails(pipeline_run1).show()

### Publish the pipeline

In [None]:
from datetime import datetime

pipeline_name = "comics_training_pipeline"
print(pipeline_name)

published_pipeline1 = pipeline1.publish(
    name=pipeline_name, 
    description=pipeline_name)
print("Newly published pipeline id: {}".format(published_pipeline1.id))

### Get published pipeline

In [None]:
from azureml.pipeline.core import PublishedPipeline

pipeline_id = published_pipeline1.id # use your published pipeline id
published_pipeline = PublishedPipeline.get(ws, pipeline_id)
published_pipeline

### Run published pipeline using REST Endpoint

In [131]:
from azureml.core.authentication import InteractiveLoginAuthentication
import requests

auth = InteractiveLoginAuthentication()
aad_token = auth.get_authentication_header()

rest_endpoint1 = published_pipeline.endpoint

print("You can perform HTTP POST on URL {} to trigger this pipeline".format(rest_endpoint1))

# specify the param when running the pipeline
response = requests.post(rest_endpoint1, 
                         headers=aad_token, 
                         json={"ExperimentName": "Comics_Reader_Training",
                               "RunSource": "SDK",
                               "ParameterAssignments": {"image_tag": "sad_monkey_in_cage", "search_term": "sad monkey in a cage"}})
run_id = response.json()["Id"]

print(run_id)

You can perform HTTP POST on URL https://westus2.aether.ms/api/v1.0/subscriptions/b8c23406-f9b5-4ccb-8a65-a8cb5dcd6a5a/resourceGroups/pudixit-rg/providers/Microsoft.MachineLearningServices/workspaces/pudixit-ws/PipelineRuns/PipelineSubmit/4f0a5618-32c1-45b0-8b07-aa5fb4d2760e to trigger this pipeline
4b9bb16b-2656-44f2-a931-858ef35bded4


In [132]:
from azureml.pipeline.core import PipelineRun

exp = ws.experiments['Comics_Reader_Training']
published_pipeline_run = PipelineRun(exp, run_id)
RunDetails(published_pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

## Schedule your published pipeline

Schedule operations require id of a published pipeline. You can get all published pipelines and do Schedule operations on them, or if you already know the id of the published pipeline, you can use it directly as well.

In [None]:
from azureml.pipeline.core.schedule import ScheduleRecurrence, Schedule

recurrence = ScheduleRecurrence(frequency="Day", interval=2, hours=[22], minutes=[30]) # Runs every other day at 10:30pm

schedule = Schedule.create(workspace=ws, name="Comic_Reader_Schedule",
                           pipeline_id=pipeline_id, 
                           experiment_name='Comics_Reader_Training',
                           recurrence=recurrence,
                           wait_for_provisioning=True,
                           description="Comics_Reader_Training",
                           pipeline_parameters={"image_tag": "Man_with_a_hat", "search_term": "Man with a hat cartoon"})

# You may want to make sure that the schedule is provisioned properly
# before making any further changes to the schedule

print("Created schedule with id: {}".format(schedule.id))