In [2]:
from azureml.core import Workspace, Experiment

ws = Workspace.get(name='quick-starts-ws-142013',
                   subscription_id='a24a24d5-8d87-4c8a-99b6-91ed2d2df51f',
                   resource_group='aml-quickstarts-142013')
exp = Experiment(workspace=ws, name="hyperdrive")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-142013
Azure region: southcentralus
Subscription id: a24a24d5-8d87-4c8a-99b6-91ed2d2df51f
Resource group: aml-quickstarts-142013


In [29]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.exceptions import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

cpu_cluster_name = "cpu-cluster"

try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                            min_nodes=0,
                                                            max_nodes=4, 
                                                            idle_seconds_before_scaledown=2400)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

CreatingAmlCompute is getting created. Consider calling wait_for_completion() first

.AmlCompute is getting created. Consider calling wait_for_completion() first

.AmlCompute is getting created. Consider calling wait_for_completion() first

..
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.2
- scikit-learn
- pip:
  - azureml-defaults

Writing conda_dependencies.yml


In [17]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.core import Environment, ScriptRunConfig
from shutil import copyfile
import os

# Specify parameter sampler
ps = RandomParameterSampling({
    'C' : uniform(0.01, 10.0),
    'max_iter' : choice(50, 100, 150, 200, 250)
})

# Specify a Policy
policy = BanditPolicy(slack_factor=0.01)

if "training" not in os.listdir():
    os.mkdir("./training")
    
copyfile('./train.py', './training/train.py')

## !!!DEPRECATED!!! ##
# # Create a SKLearn estimator for use with train.py
# est = SKLearn(source_directory = ".",
#               compute_target=cpu_cluster,
#               entry_script='train.py')
########################

# Create SKLearn environment
sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')

src = ScriptRunConfig(source_directory='./training/',
                      script='train.py',
                      arguments=['--C', 1.0, '--max_iter', 100],
                      compute_target=cpu_cluster,
                      environment=sklearn_env)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=src, 
                                     hyperparameter_sampling=ps,
                                     policy=policy,
                                     primary_metric_name='Accuracy', 
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                                     max_total_runs=50,
                                     max_concurrent_runs=4)

In [18]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

hyperdive_run = exp.submit(hyperdrive_config)
RunDetails(hyperdive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [25]:
import joblib
from azureml.core import Model
# Get your best run and save the model from that run.

best_run = hyperdive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
best_run_parameters = best_run.get_details()['runDefinition']['arguments']

print('\nBest run ID: ', best_run.id)
print('\nAccuracy: ', best_run_metrics['Accuracy'])
print('\nInverse Regularization C: ', best_run_parameters[5])
print('\nMax Iterations: ', best_run_parameters[7])

hd_model = best_run.register_model(model_name = 'sklearn-logreg-hd.joblib',
                                   model_path = './outputs/hd-model.joblib',
                                   model_framework = Model.Framework.SCIKITLEARN,
                                   model_framework_version = '0.19.1'
)


Best run ID:  HD_a313e12b-2e6d-449f-9ffd-1d8189f7b82f_32

Accuracy:  0.9058023792182569

Regularization:  0.013669354923587996

Max Iterations:  150


In [32]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds = TabularDatasetFactory.from_delimited_files("https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv")

In [None]:
import pandas as pd
from train import clean_data
from sklearn.model_selection import train_test_split

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

# Split between train and test sets (same test size and random state as hyperdrive parameters to allow for comparisons)
train_data, test_data = train_test_split(pd.concat([x, y], axis=0), test_size=0.25, random_state=34)


In [41]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.
automl_config = AutoMLConfig(
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='AUC_weighted',
    training_data=train_data,
    label_column_name='y',
    n_cross_validations=5,
    compute_target=cpu_cluster)

In [42]:
# Submit your automl run

exp = Experiment(workspace=ws, name="auto-ml")
run = exp.submit(config=automl_config, show_output=True)

No run_configuration provided, running on local with default configuration


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/anaconda/envs/azureml_py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-42-b1783fd4ca2b>", line 4, in <module>
    run = exp.submit(config=automl_config, show_output=True)
  File "/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/_jupyter_common/__init__.py", line 86, in submit
    run = original_submit(self, config, tags, **kwargs)
  File "/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/core/experiment.py", line 220, in submit
    run = submit_func(config, self.workspace, self.name, **kwargs)
  File "/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/automlconfig.py", line 104, in _automl_static_submit
    show_output)
  File "/anaconda/envs/azureml_py36/lib/python3.6/site-packages/azureml/train/automl/automlconfig.py", line 200, in _start_execution
    automl_run = _def

TypeError: object of type 'NoneType' has no len()

In [None]:
# Retrieve and save your best automl model.

### YOUR CODE HERE ###

In [None]:
# Delete compute cluster
cpu_cluster.delete()