In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
ws.get_details()

exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-144839
Azure region: southcentralus
Subscription id: cdbe0b43-92a0-4715-838a-f2648cc7ad21
Resource group: aml-quickstarts-144839


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "optim-mlpipe"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)



Jobrunning..........................................................

In [18]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
import os

# Specifing parameter sampler
ps = RandomParameterSampling({
    '--C': choice(0.001, 0.01, .1, .3, .5, 1.0),
    '--max_iter': choice(50, 100, 150, 200)
})

# Specifing a Policy
policy = BanditPolicy(slack_factor=0.1, evaluation_interval=1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")

# Creating a SKLearn estimator for use with train.py
est = SKLearn(source_directory='./',
              entry_script='train.py',
              compute_target=compute_target,
              vm_size='STANDARD_D2_V2')

# Creating a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est,
                                     hyperparameter_sampling=ps,
                                     policy=policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=50,
                                     max_concurrent_runs=4)



In [19]:
# Submitting hyperdrive run to the experiment and show run details with the widget.

hd_run = exp.submit(hyperdrive_config, show_output=True)
RunDetails(hd_run).show()



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [13]:
import joblib
# Getting best run and saving the model from that run.

best_hd_run = hd_run.get_best_run_by_primary_metric()
best_hd_metrics = best_hd_run.get_metrics()

print("Best Run: ", best_hd_run)
print("Best Run Metrics: ", best_hd_metrics)

best_hd_run.download_file(
    best_hd_run.get_file_names()[-1],
    output_file_path='./outputs/'
)

best_hd_model = best_hd_run.register_model(
        model_name='best_hyperdrive_model',
        model_path='./outputs/model.joblib'
)

AttributeError: 'NoneType' object has no attribute 'get_metrics'

In [None]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
data_path = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

data = TabularDatasetFactory.from_delimited_files(path=data_path)

In [None]:
from train import clean_data
from sklearn.model_selection import train_test_split

# Use the clean_data function to clean your data.
x, y = clean_data(data)

X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=42)

In [None]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    compute_target=compute_target,
    task='classification',
    primary_metric='accuracy',
    training_data=data,
    label_column_name='y',
    n_cross_validations=5,
    enable_onnx_compatible_models=True)

In [None]:
# Submit your automl run

exp_automl = Experiment(ws, 'automl')
automl_run = exp_automl.submit(config=automl_config, show_output=True)

In [None]:
# Retrieve and save your best automl model.
from azureml.automl.runtime.onnx_convert import OnnxConverter
best_automl_run, best_automl_onnx_model = automl_run.get_output(return_onnx_model=True)

OnnxConverter.save_onnx_model(best_automl_onnx_model, file_path='./outputs/best_automl_model.onnx')


In [6]:
AmlCompute.delete(compute_target)

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

