In [1]:
# AzureML and Pipeline SDK specific imports

import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as py
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

print("SDK version:", azureml.core.VERSION)

##
# Initialize worksace
##
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

SDK version: 1.18.0
Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code RQMVFA9L4 to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
quick-starts-ws-127700
aml-quickstarts-127700
southcentralus
ac15aef5-0abe-4be6-a0bd-40abc1594138


In [2]:


##
# Create Azure ML Experiment
#
experiment_name = 'ml-experiment-bike-1'
project_folder = './pipeline-project-bike'
experiment = Experiment(ws, experiment_name)
experiment


##
# Create or Attach AmlCompute cluster (for training)
#
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

amlcompute_cluster_name = 'cluster-bike-1'

try:
	compute_target = ComputeTarget (workspace=ws, name=amlcompute_cluster_name)
	print('Found existing cluster. Using it.')
except ComputeTargetException:
	compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
	compute_target=ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

#compute_target.wait_for_completion(show_output=True, min_node_count=1, timeout_in_minutes=10)	



In [3]:

##
# Prepare Dataset
#
found = False
key = "Bikesharing Dataset"
description_text = "Bike Sharing Dataset for Udacity ND"

if key in ws.datasets.keys():
	found = True
	dataset = ws.datasets[key]
	
if not found:
	experiment_data = 'https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv'
	dataset = Dataset.Tabular.from_delimited_files(experiment_data)
	dateset = dataset.register(workspace = ws,
								name = key,
								description = description_text)
	
dframe = dataset.to_pandas_dataframe()
dframe.describe()
dataset.take(5).to_pandas_dataframe()

Unnamed: 0,instant,date,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,6,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,1,0,1,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,4,2011-01-04,1,0,1,2,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
4,5,2011-01-05,1,0,1,3,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600


In [4]:


##
# Train model on AutoML
#
automl_settings = {
		"experiment_timeout_minutes": 20,
		"max_concurrent_iterations": 4,
		"primary_metric": 'normalized_root_mean_squared_error',
		"n_cross_validations": 5
}
automl_config = AutoMLConfig(compute_target = compute_target,
							task = "forecasting",
							training_data = dataset,
							time_column_name = "date",
							label_column_name = "cnt",
							enable_early_stopping = True,
							path = project_folder,
							# featurization = 'auto',
							debug_log = "automl_errors.log",
							**automl_settings
							)

##
# Train model on AutoML
# Create pipeline and AutoML step
#
from azureml.pipeline.core import PipelineData, TrainingOutput
datastore = ws.get_default_datastore()
metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'
metrics_data = PipelineData(name = 'metrics_data',
							datastore = datastore,
							pipeline_output_name = metrics_output_name,
							training_output = TrainingOutput(type = 'Metrics')
							)
model_data = PipelineData(name = 'model_data',
							datastore = datastore,
							pipeline_output_name = best_model_output_name,
							training_output = TrainingOutput(type = 'Model')
							)
# AutoML step
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails
automl_step = AutoMLStep(name = 'automl_module',
						automl_config = automl_config,
						outputs = [metrics_data, model_data],
						alow_reuse = True
						)
pipeline = Pipeline (description = "pipeline_w_automl_step",
					workspace = ws,
					steps = automl_step
					)
pipeline_run = experiment.submit(pipeline)
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion()

Created step automl_module [e36445fd][33ead9b8-a78a-4e2b-a360-ce6d89752b14], (This step will run and generate new outputs)
Submitted PipelineRun d85a0d58-95fb-43c0-ac81-0aa2ad524b5e
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/ml-experiment-bike-1/runs/d85a0d58-95fb-43c0-ac81-0aa2ad524b5e?wsid=/subscriptions/ac15aef5-0abe-4be6-a0bd-40abc1594138/resourcegroups/aml-quickstarts-127700/workspaces/quick-starts-ws-127700
PipelineRunId: d85a0d58-95fb-43c0-ac81-0aa2ad524b5e
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/ml-experiment-bike-1/runs/d85a0d58-95fb-43c0-ac81-0aa2ad524b5e?wsid=/subscriptions/ac15aef5-0abe-4be6-a0bd-40abc1594138/resourcegroups/aml-quickstarts-127700/workspaces/quick-starts-ws-127700
PipelineRun Status: Running


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …



StepRunId: acd2d00f-fd19-4732-b5ff-5522f95a3188
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/ml-experiment-bike-1/runs/acd2d00f-fd19-4732-b5ff-5522f95a3188?wsid=/subscriptions/ac15aef5-0abe-4be6-a0bd-40abc1594138/resourcegroups/aml-quickstarts-127700/workspaces/quick-starts-ws-127700
StepRun( automl_module ) Status: NotStarted
StepRun( automl_module ) Status: Running

StepRun(automl_module) Execution Summary
StepRun( automl_module ) Status: Finished



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': 'd85a0d58-95fb-43c0-ac81-0aa2ad524b5e', 'status': 'Completed', 'startTimeUtc': '2020-11-24T12:05:36.17128Z', 'endTimeUtc': '2020-11-24T12:47:48.291552Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{}'}, 'inputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://mlstrg127700.blob.core.windows.net/azureml/ExperimentRun/dcid.d85a0d58-95fb-43c0-ac81-0a

'Finished'

In [6]:


##
# Examine results - retreive metrics of child runs
#
import json
metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)
num_file_downloaded = metrics_output.download('.', show_progress = True)
with open(metrics_output._path_on_datastore) as f:
	metrics_output_result = f.read()
deserialized_metrisc_output = json.loads(metrics_output_result)
df = pd.DataFrame(deserialized_metrisc_output)
df

##
# Examine results - retreive best model
#
import pickle
best_model_output = pipeline_run.get_pipeline_output(best_model_output_name)
num_file_downloaded = best_model_output.download('.', show_progress = True)
with open(best_model_output._path_on_datastore, "rb") as f:
	best_model = pickle.load(f)
best_model
best_model.steps





Downloading azureml/acd2d00f-fd19-4732-b5ff-5522f95a3188/model_data
Downloaded azureml/acd2d00f-fd19-4732-b5ff-5522f95a3188/model_data, 1 files out of an estimated total of 1


[('timeseriestransformer',
  TimeSeriesTransformer(featurization_config=None,
                        pipeline_type=<TimeSeriesPipelineType.FULL: 1>)),
 ('RobustScaler',
  RobustScaler(copy=True, quantile_range=[25, 75], with_centering=True,
               with_scaling=False)),
 ('LassoLars',
  LassoLars(alpha=0.001, copy_X=True, eps=2.220446049250313e-16,
            fit_intercept=True, fit_path=True, max_iter=500, normalize=False,
            positive=False, precompute='auto', verbose=False))]

In [8]:
##
# Publish and run from REST endpoint
#
ws = Workspace.from_config()
print (ws.name, ws.location, ws.resource_group, ws.subscription_id, sep = '\n')

experiment_name = 'ml-experiment-bike-1'
project_folder = './pipeline-project-bike'
experiment = Experiment(ws, experiment_name)
experiment

from azureml.pipeline.core import PipelineRun
run_id = "d85a0d58-95fb-43c0-ac81-0aa2ad524b5e" #update
pipeline_run = PipelineRun(experiment, run_id)

published_pipeline = pipeline_run.publish_pipeline(name = "Bike sharing training",
													description = "Training bike sharing pipeline",
													version = "1.0"
													)
published_pipeline

from azureml.core.authentication import InteractiveLoginAuthentication
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

import requests
rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint,
						headers = auth_header,
						json={"ExperimentName": "bike-pipeline-rest-endpoint"}
						)

try:
	response.raise_for_status()
except Exception:
	raise Exception("Received bad response from endpoint: {}\n"
					"Response Code: {}\n"
					"Headers: {}\n"
					"Content: {}".format(rest_endpoint, response.status_code, response.headers, response.content)
					)
run_id = response.json().get('Id')
print('Submitted pipeline run: ', run_id)

from azureml.pipeline.core.run import PipelineRun
from azureml.widgets import RunDetails
published_pipeline_run = PipelineRun(ws.experiments["bike-pipeline-rest-endpoint"], run_id)
RunDetails(published_pipeline_run).show()

quick-starts-ws-127700
southcentralus
aml-quickstarts-127700
ac15aef5-0abe-4be6-a0bd-40abc1594138
Submitted pipeline run:  c7b04816-d883-4a48-a92a-8de4d9586852


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …