In [1]:
from azureml.core import Workspace, Experiment

#ws = Workspace.get(name="quick-starts-ws-", subscription_id="", resource_group="aml-quickstarts-")
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code RTV5KDUBM to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
Workspace name: quick-starts-ws-146656
Azure region: southcentralus
Subscription id: d7f39349-a66b-446e-aba6-0053c2cf1c11
Resource group: aml-quickstarts-146656


In [2]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.exceptions import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

### YOUR CODE HERE ###
# First, tries to get the compute (if it exists).
cpu_cluster_name = "udacity-compute"
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
cpu_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.

Running


In [8]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.policy import MedianStoppingPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform
from azureml.train.hyperdrive.parameter_expressions import choice
from azureml.core import ScriptRunConfig
from azureml.core import Environment
import os
import shutil

# Specify parameter sampler
ps = RandomParameterSampling({
        '--C': choice(0.01, 0.1, 1, 10, 100),
        '--max_iter': choice(50, 100, 200)
})
# Specify a Policy
#policy = MedianStoppingPolicy(evaluation_interval=1, delay_evaluation=10)
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "outputs" not in os.listdir():
    os.mkdir("./outputs")

# Create a SKLearn estimator for use with train.py
# Instead, using a curated environment that includes sciKit-Learn, Pandas...
# It may not include numpy or os dependencias, in that case, clone curated 
# environment and customize it adding new dependencies.
#minimal_env = Environment.get(workspace=ws, name="AzureML-Tutorial")

est = SKLearn(source_directory='./',
            compute_target=cpu_cluster,
            entry_script='train.py')

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig (estimator=est,
                                     hyperparameter_sampling=ps,
                                     policy=policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=100,
                                     max_concurrent_runs=4)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [9]:
import joblib
# Submit your hyperdrive run to the experiment and show run details with the widget.
hdr = exp.submit(config=hyperdrive_config)
# Get your best run and save the model from that run.
RunDetails(hdr).show()

hdr.wait_for_completion(show_output=True)



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_874f5223-ec50-494d-801c-ff48b389346a
Web View: https://ml.azure.com/runs/HD_874f5223-ec50-494d-801c-ff48b389346a?wsid=/subscriptions/9e65f93e-bdd8-437b-b1e8-0647cd6098f7/resourcegroups/aml-quickstarts-146351/workspaces/quick-starts-ws-146351&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-06-02T06:47:26.826933][API][INFO]Experiment created<END>\n""<START>[2021-06-02T06:47:27.372330][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-06-02T06:47:27.558838][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_874f5223-ec50-494d-801c-ff48b389346a
Web View: https://ml.azure.com/runs/HD_874f5223-ec50-494d-801c-ff48b389346a?wsid=/subscriptions/9e65f93e-bdd8-437b-b1e8-0647cd6098f7/resourcegroups/aml-quickstarts-146351/workspaces/quick-starts-ws-146351&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254



{'runId': 'HD_874f5223-ec50-494d-801c-ff48b389346a',
 'target': 'udacity-compute',
 'status': 'Completed',
 'startTimeUtc': '2021-06-02T06:47:26.530461Z',
 'endTimeUtc': '2021-06-02T06:54:30.94359Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '82658469-115a-4f74-9d8d-a1f1b2d1cd71',
  'score': '0.9141122913505311',
  'best_child_run_id': 'HD_874f5223-ec50-494d-801c-ff48b389346a_3',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg146351.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_874f5223-ec50-494d-801c-ff48b389346a/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=ycYSJSuNc8i%2BEYLaYKFFtFdtKRTmbJhxdVnprwScSr8%3D&st=2021-06-02T06%3A44%3A42Z&se=2021-06-02T14%3A54%3A42Z&sp=r'},
 'submittedBy': 'ODL_User 146351'}

In [21]:
best_run = hdr.get_best_run_by_primary_metric()
print("Best run metrics: ", best_run.get_metrics())
print("Best run details: ", best_run.get_details())
print("Best run files: ", best_run.get_file_names())
best_run.upload_file('outputs/model_hyper.joblib', 'outputs/model_hyper.joblib')
#joblib.dump(value = best_run.id, filename='outputs/model_hyper.joblib')
model = best_run.register_model(model_name='best_model', model_path = './outputs/model_hyper.joblib')

Best run metrics:  {'Regularization Strength:': 100.0, 'Max iterations:': 50, 'Accuracy': 0.9141122913505311}
Best run details:  {'runId': 'HD_874f5223-ec50-494d-801c-ff48b389346a_3', 'target': 'udacity-compute', 'status': 'Completed', 'startTimeUtc': '2021-06-02T06:48:06.165385Z', 'endTimeUtc': '2021-06-02T06:49:05.086997Z', 'properties': {'_azureml.ComputeTargetType': 'amlcompute', 'ContentSnapshotId': '82658469-115a-4f74-9d8d-a1f1b2d1cd71', 'ProcessInfoFile': 'azureml-logs/process_info.json', 'ProcessStatusFile': 'azureml-logs/process_status.json'}, 'inputDatasets': [], 'outputDatasets': [], 'runDefinition': {'script': 'train.py', 'command': '', 'useAbsolutePath': False, 'arguments': ['--C', '100', '--max_iter', '50'], 'sourceDirectoryDataStore': None, 'framework': 'Python', 'communicator': 'None', 'target': 'udacity-compute', 'dataReferences': {}, 'data': {}, 'outputData': {}, 'jobName': None, 'maxRunDurationSeconds': None, 'nodeCount': 1, 'priority': None, 'credentialPassthrough':

In [3]:
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core import Dataset

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

datastore_path = 'https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'
ds = Dataset.Tabular.from_delimited_files(path=datastore_path)

In [4]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

In [5]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.

# Regression (r2_score) or classification(accuracy).
automl_config = AutoMLConfig(
    compute_target=cpu_cluster,
    experiment_timeout_minutes=15,
    task='classification',
    primary_metric='accuracy',
    training_data=ds,
    label_column_name='y',
    enable_onx_compatible_models=True,
    n_cross_validations=2)

In [6]:
# Submit your automl run

remote_run = exp.submit(automl_config, show_output=False)



Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,AutoML_f7b9d4bd-189f-45f3-95b5-32041b693158,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


In [8]:
# Retrieve and save your best automl model.
remote_run.wait_for_completion()
automl_best_run, automl_best_model = remote_run.get_output()
print("Best run metrics:", automl_best_run)

automl_best_model_reg = remote_run.register_model(model_name = 'udacity_automl')

print(automl_best_model.steps[1][1].estimators)

Package:azureml-automl-runtime, training version:1.29.0, current version:1.28.0.post2
Package:azureml-core, training version:1.29.0, current version:1.28.0
Package:azureml-dataset-runtime, training version:1.29.0, current version:1.28.0
Package:azureml-defaults, training version:1.29.0, current version:1.28.0
Package:azureml-interpret, training version:1.29.0, current version:1.28.0
Package:azureml-mlflow, training version:1.29.0, current version:1.28.0
Package:azureml-pipeline-core, training version:1.29.0, current version:1.28.0
Package:azureml-telemetry, training version:1.29.0, current version:1.28.0
Package:azureml-train-automl-client, training version:1.29.0, current version:1.28.0
Package:azureml-train-automl-runtime, training version:1.29.0, current version:1.28.0


Best run metrics: Run(Experiment: udacity-project,
Id: AutoML_f7b9d4bd-189f-45f3-95b5-32041b693158_11,
Type: azureml.scriptrun,
Status: Completed)
[('1', Pipeline(memory=None,
         steps=[('maxabsscaler', MaxAbsScaler(copy=True)),
                ('xgboostclassifier',
                 XGBoostClassifier(n_jobs=1, problem_info=ProblemInfo(
    dataset_samples=32950,
    dataset_features=132,
    dataset_classes=2,
    dataset_num_categorical=0,
    dataset_categoricals=None,
    pipeline_categoricals=None,
    dataset_y_std=None,
    dataset_uid=None,
    subsampling=False,
    task='classifi...
    subsampling_schedule='hyperband_clip',
    cost_mode_param=None,
    iteration_timeout_mode=0,
    iteration_timeout_param=None,
    feature_column_names=None,
    label_column_name=None,
    weight_column_name=None,
    cv_split_column_names=None,
    enable_streaming=None,
    timeseries_param_dict=None,
    gpu_training_param_dict={'processing_unit_type': 'cpu'}
), random_state=0, tree

In [34]:
# Ensure not to incur extra charges.
cpu_cluster.delete()