In [1]:
# AzureML and Pipeline SDK specific imports

import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as py
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

print("SDK version:", azureml.core.VERSION)

##
# Initialize worksace
##
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

SDK version: 1.18.0
quick-starts-ws-127608
aml-quickstarts-127608
southcentralus
0c5a644d-c5ce-4e3b-bf42-4cb265317817


In [2]:


##
# Create Azure ML Experiment
#
experiment_name = 'ml-experiment-bike-1'
project_folder = './pipeline-project-bike'
experiment = Experiment(ws, experiment_name)
experiment


##
# Create or Attach AmlCompute cluster (for training)
#
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

amlcompute_cluster_name = 'cluster-bike-1'

try:
	compute_target = ComputeTarget (workspace=ws, name=amlcompute_cluster_name)
	print('Found existing cluster. Using it.')
except ComputeTargetException:
	compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
	compute_target=ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count=1, timeout_in_minutes=10)	



Found existing cluster. Using it.
Succeeded.....

KeyboardInterrupt: 

In [6]:

##
# Prepare Dataset
#
found = False
key = "Bikesharing Dataset"
description_text = "Bike Sharing Dataset for Udacity ND"

if key in ws.datasets.keys():
	found = True
	dataset = ws.datasets[key]
	
if not found:
	experiment_data = 'https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv'
	dataset = Dataset.Tabular.from_delimited_files(experiment_data)
	dateset = dataset.register(workspace = ws,
								name = key,
								description = description_text)
	
dframe = dataset.to_pandas_dataframe()
dframe.describe()
dataset.take(5).to_pandas_dataframe()

<bound method TabularDataset.to_pandas_dataframe of {
  "source": [
    "https://raw.githubusercontent.com/Azure/MachineLearningNotebooks/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes",
    "Take"
  ]
}>

In [10]:


##
# Train model on AutoML
#
automl_settings = {
		"experiment_timeout_minutes": 20,
		"max_concurrent_iterations": 4,
		"primary_metric": 'normalized_root_mean_squared_error',
		"n_cross_validations": 5
}
automl_config = AutoMLConfig(compute_target = compute_target,
							task = "forecasting",
							training_data = dataset,
							time_column_name = "date",
							label_column_name = "cnt",
							enable_early_stopping = True,
							path = project_folder,
							# featurization = 'auto',
							debug_log = "automl_errors.log",
							**automl_settings
							)

##
# Train model on AutoML
# Create pipeline and AutoML step
#
from azureml.pipeline.core import PipelineData, TrainingOutput
datastore = ws.get_default_datastore()
metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'
metrics_data = PipelineData(name = 'metrics_data',
							datastore = datastore,
							pipeline_output_name = metrics_output_name,
							training_output = TrainingOutput(type = 'Metrics')
							)
model_data = PipelineData(name = 'model_data',
							datastore = datastore,
							pipeline_output_name = best_model_output_name,
							training_output = TrainingOutput(type = 'Model')
							)
# AutoML step
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails
automl_step = AutoMLStep(name = 'automl_module',
						automl_config = automl_config,
						outputs = [metrics_data, model_data],
						alow_reuse = True
						)
pipeline = Pipeline (description = "pipeline_w_automl_step",
					workspace = ws,
					steps = automl_step
					)
pipeline_run = experiment.submit(pipeline)
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion()

Created step automl_module [b31237f8][00174e88-45ac-4b84-b426-e294ae4c703c], (This step will run and generate new outputs)
Submitted PipelineRun 1a862d48-eaf4-4a7a-a6de-427a25f57c06
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/ml-experiment-bike-1/runs/1a862d48-eaf4-4a7a-a6de-427a25f57c06?wsid=/subscriptions/0c5a644d-c5ce-4e3b-bf42-4cb265317817/resourcegroups/aml-quickstarts-127608/workspaces/quick-starts-ws-127608
PipelineRunId: 1a862d48-eaf4-4a7a-a6de-427a25f57c06
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/ml-experiment-bike-1/runs/1a862d48-eaf4-4a7a-a6de-427a25f57c06?wsid=/subscriptions/0c5a644d-c5ce-4e3b-bf42-4cb265317817/resourcegroups/aml-quickstarts-127608/workspaces/quick-starts-ws-127608
PipelineRun Status: NotStarted


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRun Status: Running


StepRunId: d8882887-4998-4eda-8b2b-eaf89a2dab61
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/ml-experiment-bike-1/runs/d8882887-4998-4eda-8b2b-eaf89a2dab61?wsid=/subscriptions/0c5a644d-c5ce-4e3b-bf42-4cb265317817/resourcegroups/aml-quickstarts-127608/workspaces/quick-starts-ws-127608
StepRun( automl_module ) Status: NotStarted
StepRun( automl_module ) Status: Running


In [None]:


##
# Examine results - retreive metrics of child runs
#
import json
metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)
num_file_downloaded = metrics_output.download('.', show_progress = True)
with open(metrics_output._path_on_datastore) as f:
	metrics_output_result = f.read()
deserialized_metrisc_output = json.loads(metrics_output_result)
df = pd.DataFrame(deserialized_metrisc_output)
df

##
# Examine results - retreive best model
#
import pickle
best_model_output = pipeline_run._path_on_datastore(best_model_output_name)
num_file_downloaded = best_model_output.download('.', show_progress = True)
with open(best_model_output._path_on_datastore, "rb") as f:
	best_model = pickle.load(f)
best_model
best_model.steps



In [None]:
##
# Publish and run from REST endpoint
#
ws = Workspace.from_config()
print (ws.name, ws.location, ws.resource_group, ws.subscription_id, sep = '\n')

experiment_name = 'ml-experiment-bike-1'
project_folder = './pipeline-project-bike'
experiment = Experiment(ws, experiment_name)
experiment

from azureml.pipeline.core import PipelineRun
run_id = "1a862d48-eaf4-4a7a-a6de-427a25f57c06" #update
pipeline_run = PipelineRun(ws, run_id)

published_pipeline = pipeline_run.publish_pipeline(name = "Bike sharing training",
													description = "Training bike sharing pipeline",
													version = "1.0"
													)
published_pipeline

from azureml.core.authentication import InteractiveLoginAuthentication
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

import requests
rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint,
						headers = auth_header,
						json={"ExperimentName": "bike-pipeline-rest-endpoint"}
						)

try:
	response.raise_for_status()
except Exception:
	raise Exception("Received bad response from endpoint: {}\n"
					"Response Code: {}\n"
					"Headers: {}\n"
					"Content: {}".format(rest_endpoint, response.status_code, response.headers, response.content)
					)
run_id = response.json().get('Id')
print('Submitted pipeline run: ', run_id)

from azureml.pipeline.core.run import PipelineRun
from azureml.widgets import RunDetails
published_pipeline_run = PipelineRun(ws.experiments["bike-pipeline-rest-endpoint"], run_id)
RunDetails(published_pipeline_run).show()