In [1]:
import os 
import random
import numpy as np
import pandas as pd
from azureml.core import Workspace, Dataset, Datastore, Experiment, Environment, Model
from azureml.core.compute import ComputeTarget
from azureml.core.runconfig import RunConfiguration, CondaDependencies, DEFAULT_CPU_IMAGE
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline, PublishedPipeline, StepSequence
from azureml.widgets import RunDetails

# Initializing The Workspace & Compute Target

In [2]:
ws = Workspace.from_config(path = 'config.json')
compute_target = ComputeTarget(ws, 'automl-cluster')
datastore = ws.get_default_datastore()

# Creating A Machine Learning (ML) Pipeline

## Creating & Containerizing An Environment

In [3]:
environment = Environment(name = 'AutoML Environment For Scoring')
conda_dependencies = CondaDependencies()

conda_dependencies.add_pip_package('azureml-defaults==1.51.0')
conda_dependencies.add_pip_package('azureml-automl-core==1.51.0')
conda_dependencies.add_pip_package('azureml-automl-runtime==1.51.0')

environment.python.conda_dependencies = conda_dependencies
registered_environment = environment.register(workspace = ws)

In [4]:
iris_run_config = RunConfiguration()
iris_run_config.environment = environment
iris_run_config.environment.docker.enabled = True
iris_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


## Configuring & Running The ML Local Scoring Pipeline

In [5]:
iris_scoring_pipeline_experiment = Experiment(ws,  'Iris-Scoring-Pipeline')

In [6]:
iris_scoring_step = PythonScriptStep(
    name = 'Iris-Scoring-Step', script_name = 'Iris Local Scoring.py', source_directory = 'Scoring Scripts', 
    arguments = [], inputs = [], compute_target = compute_target, runconfig = iris_run_config, allow_reuse = False)
iris_scoring_pipeline = Pipeline(workspace = ws, steps = StepSequence(steps = [iris_scoring_step]))
iris_scoring_pipeline_run = iris_scoring_pipeline_experiment.submit(iris_scoring_pipeline, show_output = True)

RunDetails(iris_scoring_pipeline_run).show()
iris_scoring_pipeline_run.wait_for_completion(show_output = True)

Created step Iris-Scoring-Step [2e94474b][8b95d05a-ba16-4b54-8f54-6f3a984aa313], (This step will run and generate new outputs)
Submitted PipelineRun d3dec654-be29-423b-9d5b-318a1bcc428f
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/d3dec654-be29-423b-9d5b-318a1bcc428f?wsid=/subscriptions/0c19fc19-85fd-4aa4-b133-61dd20fa93df/resourcegroups/edwin.spartan117-rg/workspaces/auotml-example-workspace&tid=c5f4b1c2-b533-4788-b1c5-99d0f10fb9b6


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: d3dec654-be29-423b-9d5b-318a1bcc428f
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/d3dec654-be29-423b-9d5b-318a1bcc428f?wsid=/subscriptions/0c19fc19-85fd-4aa4-b133-61dd20fa93df/resourcegroups/edwin.spartan117-rg/workspaces/auotml-example-workspace&tid=c5f4b1c2-b533-4788-b1c5-99d0f10fb9b6
PipelineRun Status: Running


StepRunId: 9fb2cb04-f8e3-4273-b6a9-a41a5800907e
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/9fb2cb04-f8e3-4273-b6a9-a41a5800907e?wsid=/subscriptions/0c19fc19-85fd-4aa4-b133-61dd20fa93df/resourcegroups/edwin.spartan117-rg/workspaces/auotml-example-workspace&tid=c5f4b1c2-b533-4788-b1c5-99d0f10fb9b6
StepRun( Iris-Scoring-Step ) Status: Queued
StepRun( Iris-Scoring-Step ) Status: Running

StepRun(Iris-Scoring-Step) Execution Summary
StepRun( Iris-Scoring-Step ) Status: Finished
{'runId': '9fb2cb04-f8e3-4273-b6a9-a41a5800907e', 'target': 'automl-cluster', 'status': 'Completed', 'startTimeUtc': '2023-11-21T12:43:07.911972Z',



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': 'd3dec654-be29-423b-9d5b-318a1bcc428f', 'status': 'Completed', 'startTimeUtc': '2023-11-21T12:40:33.301165Z', 'endTimeUtc': '2023-11-21T12:44:03.237951Z', 'services': {}, 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{}', 'azureml.continue_on_step_failure': 'False', 'azureml.continue_on_failed_optional_input': 'True', 'azureml.pipelineComponent': 'pipelinerun', 'azureml.pipelines.stages': '{"Initialization":null,"Execution":{"StartTime":"2023-11-21T12:40:33.7398592+00:00","EndTime":"2023-11-21T12:44:03.1178618+00:00","Status":"Finished"}}'}, 'inputDatasets': [], 'outputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://auotmlexamplew5880114168.blob.core.windows.net/azureml/ExperimentRun/dcid.d3dec654-be29-423b-9d5b-318a1bcc428f/logs/azureml/executionlogs.txt?sv=2019-07-07&sr=b&sig=oi6pXDwaWrQ3Bp007VJArNvGJn3hWbuI4TaLxh%2Fu3

'Finished'

In [7]:
iris_scoring_pipeline_published = iris_scoring_pipeline_run.publish_pipeline(
    name = 'Iris-Local-Scoring-Pipeline', 
    description = 'Pipeline Used For Scoring Iris Data Imported Through A Linked Service To A Virtual Machine', 
    version = '3.0')
iris_scoring_pipeline_published

Name,Id,Status,Endpoint
Iris-Local-Scoring-Pipeline,c5b6c0bc-dd43-4d23-938e-ee00b43d7805,Active,REST Endpoint


# Creating A Parallel Scoring Pipeline

In [6]:
from azureml.pipeline.steps import ParallelRunStep, ParallelRunConfig
from azureml.pipeline.core import PipelineData

## Configuring & Running The Parallel Scoring Pipeline

In [None]:
iris_parallel_scoring_pipeline_experiment = Experiment(ws,  'Iris-Parallel-Scoring-Pipeline')

In [None]:
iris_parallel_run_output = PipelineData(name = 'predictions_from_parallel_scoring', datastore = datastore)

parallel_run_environment = environment
parallel_run_environment.docker.enabled = True
parallel_run_environment.docker.base_image = DEFAULT_CPU_IMAGE

In [None]:
iris_parallel_run_config = ParallelRunConfig(
    entry_script = 'Iris Parallel Scoring.py', source_directory = 'Scoring Scripts', mini_batch_size = '1MB', 
    error_threshold = 5, output_action = 'append_row', environment = parallel_run_environment, 
    compute_target = compute_target, run_invocation_timeout = 60, node_count = 1, logging_level = 'DEBUG')

In [None]:
iris_parallel_scoring_dataset = Dataset.get_by_name(
    ws, 'Iris Dataset (Simulated With 10 Million Sythetic Data Points) For Parallel Scoring')

iris_parallel_scoring_step = ParallelRunStep(
    name = 'Iris-Parallel-Scoring-Step', parallel_run_config = iris_parallel_run_config, 
    inputs = [iris_parallel_scoring_dataset.as_named_input('Iris_Parallel_Scoring')], 
    output = iris_parallel_run_output, arguments = ['--model_name', 'Iris-MultiClass-Classification-AutoML'], 
    allow_reuse = False)

In [None]:
iris_parallel_output_step = PythonScriptStep(
    name = 'Iris-Parallel-Output-Step', script_name = 'Iris Parallel Output.py', source_directory = 'Scoring Scripts',
    arguments = ['--input_data_folder', iris_parallel_run_output], inputs = [iris_parallel_run_output], 
    compute_target = compute_target, runconfig = iris_run_config, allow_reuse = False)

In [None]:
iris_parallel_scoring_pipeline = Pipeline(
    workspace = ws, steps = StepSequence(steps = [iris_parallel_scoring_step, iris_parallel_output_step]))
iris_parallel_scoring_pipeline_run = iris_parallel_scoring_pipeline_experiment.submit(iris_parallel_scoring_pipeline, 
                                                                                      show_output = True)

RunDetails(iris_parallel_scoring_pipeline_run).show()
iris_parallel_scoring_pipeline_run.wait_for_completion(show_output = True)

In [None]:
iris_parallel_scoring_pipeline_published = iris_parallel_scoring_pipeline_run.publish_pipeline(
    name = 'Iris-Parallel-Scoring-Pipeline', description = 'Pipeline Used For Scoring Iris Data In Parallel', version = '1.0')
iris_parallel_scoring_pipeline_published

# Creating A Local Retraining Pipeline

In [7]:
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep
from azureml.pipeline.core import TrainingOutput

## Importing The Iris Training Dataset

In [8]:
iris_dataset = Dataset.get_by_name(ws, 'Iris-Local-Training-Dataset')

In [9]:
automl_retraining_config = AutoMLConfig(task = 'classification', primary_metric = 'accuracy', num_classes = 3, 
                                        featurization = 'auto', compute_target = compute_target, 
                                        training_data = iris_dataset, label_column_name = 'species', 
                                        experiment_timeout_minutes = 20, enable_early_stopping = True, 
                                        max_concurrent_iterations = 2, n_cross_validations = 10, 
                                        model_explainability = True, enable_stack_ensemble = True, 
                                        enable_voting_ensemble = True)

In [10]:
metrics_output = PipelineData(name = 'metrics_data', datastore = datastore, pipeline_output_name = 'Metrics_Output', 
                              training_output = TrainingOutput(type = 'Metrics'))

model_output = PipelineData(name = 'model_data', datastore = datastore, pipeline_output_name = 'Best_Model_Output',
                            training_output = TrainingOutput(type = 'Model'))

automl_training_step = AutoMLStep(name = 'MultiClass_Classification_AutoML_Step', automl_config = automl_retraining_config,
                                  outputs = [metrics_output, model_output], allow_reuse = False)

In [11]:
os.makedirs('Training Scripts', exist_ok = True)

## Configuring & Running The Retraining Pipeline

In [12]:
iris_retraining_experiment = Experiment(workspace = ws, name = 'Iris-Retraining-Pipeline')

In [13]:
model_registration_step = PythonScriptStep(
    name = 'Model-Registration-Step', script_name = 'Iris Model Registration Scoring.py', source_directory = 'Training Scripts',
    arguments = ['--model_name', 'Iris-MultiClass-Classification-AutoML', '--model_path', model_output, 
                 '--dataset_name', 'Iris_Dataset'], inputs = [model_output], compute_target = compute_target, 
    runconfig = iris_run_config, allow_reuse = False)

In [14]:
iris_retraining_pipeline = Pipeline(
    workspace = ws, steps = StepSequence(steps = [automl_training_step, model_registration_step]))
iris_retraining_pipeline_run = iris_retraining_experiment.submit(iris_retraining_pipeline, show_output = True)

RunDetails(iris_retraining_pipeline_run).show()
iris_retraining_pipeline_run.wait_for_completion(show_output = True)

Created step MultiClass_Classification_AutoML_Step [64ee8078][038b7421-e9a2-4065-9e7a-f68c7a8c921e], (This step will run and generate new outputs)
Created step Model-Registration-Step [c55bdace][9f347489-419c-4aa8-8c36-400a40c89180], (This step will run and generate new outputs)
Submitted PipelineRun 218ef104-dbdc-411f-8dd2-35543b33e2ed
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/218ef104-dbdc-411f-8dd2-35543b33e2ed?wsid=/subscriptions/0c19fc19-85fd-4aa4-b133-61dd20fa93df/resourcegroups/edwin.spartan117-rg/workspaces/auotml-example-workspace&tid=c5f4b1c2-b533-4788-b1c5-99d0f10fb9b6


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: 218ef104-dbdc-411f-8dd2-35543b33e2ed
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/218ef104-dbdc-411f-8dd2-35543b33e2ed?wsid=/subscriptions/0c19fc19-85fd-4aa4-b133-61dd20fa93df/resourcegroups/edwin.spartan117-rg/workspaces/auotml-example-workspace&tid=c5f4b1c2-b533-4788-b1c5-99d0f10fb9b6
PipelineRun Status: Running


StepRunId: a6cbbb13-db23-491a-80b6-0bd52f5faf05
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/a6cbbb13-db23-491a-80b6-0bd52f5faf05?wsid=/subscriptions/0c19fc19-85fd-4aa4-b133-61dd20fa93df/resourcegroups/edwin.spartan117-rg/workspaces/auotml-example-workspace&tid=c5f4b1c2-b533-4788-b1c5-99d0f10fb9b6
StepRun( MultiClass_Classification_AutoML_Step ) Status: NotStarted
StepRun( MultiClass_Classification_AutoML_Step ) Status: Running

StepRun(MultiClass_Classification_AutoML_Step) Execution Summary
StepRun( MultiClass_Classification_AutoML_Step ) Status: Finished

Experiment timeout reached, hence experiment stopped. Current 

StepRun( Model-Registration-Step ) Status: Running

StepRun(Model-Registration-Step) Execution Summary
StepRun( Model-Registration-Step ) Status: Finished
{'runId': 'b99ec2c1-69f1-402b-b2aa-abc635231fa5', 'target': 'automl-cluster', 'status': 'Completed', 'startTimeUtc': '2023-11-21T13:30:33.176657Z', 'endTimeUtc': '2023-11-21T13:31:56.338856Z', 'services': {}, 'properties': {'ContentSnapshotId': '03f60995-5eb2-4555-9829-95e3330104ee', 'StepType': 'PythonScriptStep', 'ComputeTargetType': 'AmlCompute', 'azureml.moduleid': '9f347489-419c-4aa8-8c36-400a40c89180', 'azureml.moduleName': 'Model-Registration-Step', 'azureml.runsource': 'azureml.StepRun', 'azureml.nodeid': 'c55bdace', 'azureml.pipelinerunid': '218ef104-dbdc-411f-8dd2-35543b33e2ed', 'azureml.pipeline': '218ef104-dbdc-411f-8dd2-35543b33e2ed', 'azureml.rootpipelinerunid': '218ef104-dbdc-411f-8dd2-35543b33e2ed', 'azureml.pipelineComponent': 'masterescloud', '_azureml.ComputeTargetType': 'amlctrain', 'ProcessInfoFile': 'azureml-log



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '218ef104-dbdc-411f-8dd2-35543b33e2ed', 'status': 'Completed', 'startTimeUtc': '2023-11-21T13:02:15.621921Z', 'endTimeUtc': '2023-11-21T13:31:57.362899Z', 'services': {}, 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{}', 'azureml.continue_on_step_failure': 'False', 'azureml.continue_on_failed_optional_input': 'True', 'azureml.pipelineComponent': 'pipelinerun', 'azureml.pipelines.stages': '{"Initialization":null,"Execution":{"StartTime":"2023-11-21T13:02:16.0005784+00:00","EndTime":"2023-11-21T13:31:57.2354697+00:00","Status":"Finished"}}'}, 'inputDatasets': [], 'outputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://auotmlexamplew5880114168.blob.core.windows.net/azureml/ExperimentRun/dcid.218ef104-dbdc-411f-8dd2-35543b33e2ed/logs/azureml/executionlogs.txt?sv=2019-07-07&sr=b&sig=TU3kDa84qCvCIVufL3WvjeZ0dGsqAsxYm7HZAMRwkTc

'Finished'

In [15]:
iris_retraining_pipeline_published = iris_retraining_pipeline_run.publish_pipeline(
    name = 'Iris-AutoML-Local-Retraining-Pipeline', 
    description = 'Pipeline Used For Retraining Iris Data Imported From A Virtual Machine with AutoML', 
    version = '1.0')
iris_retraining_pipeline_published

Name,Id,Status,Endpoint
Iris-AutoML-Local-Retraining-Pipeline,6a7862e0-6ae7-4879-bd6f-524baf104bdb,Active,REST Endpoint


# Scheduling & Triggering A Published Pipeline

In [None]:
from azureml.pipeline.core.schedule import ScheduleRecurrence, Schedule, TimeZone

## Creating A Manual Trigger

In [None]:
trigger_retraining_experiment = Experiment(workspace = ws, name = 'AutoML-Retraining-Trigger')
trigger_retraining_pipeline_run = trigger_retraining_experiment.submit(
    PublishedPipeline.get(workspace = ws, id = 'aed53827-2ccf-42fd-b093-1eb52043535b'))

## Creating A Schedule For Running The Pipeline

In [None]:
retraining_schedule = Schedule.create(
    workspace = ws, name = 'Iris-Retraining-Schedule', pipeline_id = 'aed53827-2ccf-42fd-b093-1eb52043535b', 
    experiment_name = 'Iris-Retraining', description = 'Retraining Of Iris Dataset With AutoML On A Regular Basis',
    recurrence = ScheduleRecurrence(frequency = 'Week', interval = 1, start_time = '2023-11-21T10:00:00', 
                                    time_zone = TimeZone.SingaporeStandardTime))