In [5]:
from azureml.core import Workspace, Experiment


ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="udacity-project")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()
print(ws)

Workspace name: quick-starts-ws-135627
Azure region: southcentralus
Subscription id: 610d6e37-4747-4a20-80eb-3aad70a55f43
Resource group: aml-quickstarts-135627
Workspace.create(name='quick-starts-ws-135627', subscription_id='610d6e37-4747-4a20-80eb-3aad70a55f43', resource_group='aml-quickstarts-135627')


In [6]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

cpu_cluster_name = "compute-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    print('Creating a new compute cluster...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

print(compute_target.get_status().serialize())

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 4, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Resizing', 'allocationStateTransitionTime': '2021-01-21T20:17:40.536000+00:00', 'errors': None, 'creationTime': '2021-01-21T20:15:35.312818+00:00', 'modifiedTime': '2021-01-21T20:15:51.399552+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


In [7]:
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
import os

# Specify parameter sampler
ps = RandomParameterSampling(
    {
        '--C' : choice(0.001,0.01,0.1,1,10,20,50,100,200,500,1000),
        '--max_iter': choice(50,100,200,300)
    }
)

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory = "./",
            compute_target=compute_target,
            vm_size='STANDARD_D2_V2',
            entry_script="train.py")

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(hyperparameter_sampling=ps, 
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     policy=policy,
                                     estimator=est,
                                     max_total_runs=16,
                                     max_concurrent_runs=4)



In [8]:
# Submit your hyperdrive run to the experiment and show run details with the widget.

hyperdrive_run = exp.submit(hyperdrive_config)

# Monitor HyperDrive runs You can monitor the progress of the runs with the following Jupyter widget
# RunDetails(hyperdrive_run).show()

hyperdrive_run.wait_for_completion(show_output=True)

assert(hyperdrive_run.get_status() == "Completed")



RunId: HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da?wsid=/subscriptions/610d6e37-4747-4a20-80eb-3aad70a55f43/resourcegroups/aml-quickstarts-135627/workspaces/quick-starts-ws-135627

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-01-21T20:18:11.263135][API][INFO]Experiment created<END>\n""<START>[2021-01-21T20:18:11.974670][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-01-21T20:18:12.314672][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-01-21T20:18:13.1830540Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da
Web View: https://ml.azure.com/experiments/udacity-project/runs/HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da?wsid=/subscriptions/610d6

In [9]:
import joblib
# Get your best run and save the model from that run.

print(hyperdrive_run.get_children_sorted_by_primary_metric(top=0, reverse=False, discard_no_metric=False))

best_run = hyperdrive_run.get_best_run_by_primary_metric()

print("Best run metrics :",best_run.get_metrics())
print("Best run details :",best_run.get_details())
print("Best run file names :",best_run.get_file_names())

[{'run_id': 'HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da_10', 'hyperparameters': None, 'best_primary_metric': 0.9176024279210926, 'status': 'Completed'}, {'run_id': 'HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da_9', 'hyperparameters': None, 'best_primary_metric': 0.9165402124430956, 'status': 'Completed'}, {'run_id': 'HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da_13', 'hyperparameters': None, 'best_primary_metric': 0.9163884673748103, 'status': 'Completed'}, {'run_id': 'HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da_12', 'hyperparameters': None, 'best_primary_metric': 0.9163884673748103, 'status': 'Completed'}, {'run_id': 'HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da_11', 'hyperparameters': None, 'best_primary_metric': 0.9163884673748103, 'status': 'Completed'}, {'run_id': 'HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da_4', 'hyperparameters': None, 'best_primary_metric': 0.9163884673748103, 'status': 'Completed'}, {'run_id': 'HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da_5', 'hyperparameters': None, 'best_primary_metric': 0.9

In [10]:
from azureml.data.dataset_factory import TabularDatasetFactory

# Create TabularDataset using TabularDatasetFactory
# Data is available at: 
# "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"

ds = TabularDatasetFactory.from_delimited_files(['https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv'])

In [11]:
from train import clean_data

# Use the clean_data function to clean your data.
x, y = clean_data(ds)

In [12]:
from azureml.train.automl import AutoMLConfig

# Set parameters for AutoMLConfig
# NOTE: DO NOT CHANGE THE experiment_timeout_minutes PARAMETER OR YOUR INSTANCE WILL TIME OUT.
# If you wish to run the experiment longer, you will need to run this notebook in your own
# Azure tenant, which will incur personal costs.

automl_config = AutoMLConfig(
    compute_target = compute_target,
    experiment_timeout_minutes=30,
    task='classification',
    primary_metric='accuracy',
    training_data=ds,
    label_column_name='y',
    enable_onnx_compatible_models=True,
    n_cross_validations=2)

In [13]:
# Submit your automl run

remote_run = exp.submit(automl_config, show_output = False)
remote_run.wait_for_completion()

Running on remote.


{'runId': 'AutoML_8d295d24-1d0b-4fe4-bb40-184bc3b586b8',
 'target': 'compute-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-21T21:14:14.373697Z',
 'endTimeUtc': '2021-01-21T22:09:05.681589Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '2',
  'target': 'compute-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"daacff17-1bd5-4d9a-9d5d-f463fad022e4\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 1, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv\\\\\\", \\\\\\"sas\\\\\\": null, \\\\\\"storageAccountName\\

In [31]:
# Retrieve and save your best automl model.
best_run, fitted_model = remote_run.get_output()

print(best_run)
print(fitted_model)

# get_metrics()
# Returns the metrics
print("Best run metrics :",best_run.get_metrics())
# get_details()
# Returns a dictionary with the details for the run
print("Best run details :",best_run.get_details())

Package:azureml-automl-runtime, training version:1.20.0, current version:1.19.0
Package:azureml-core, training version:1.20.0, current version:1.19.0
Package:azureml-dataprep, training version:2.7.2, current version:2.6.1
Package:azureml-dataprep-native, training version:27.0.0, current version:26.0.0
Package:azureml-dataprep-rslex, training version:1.5.0, current version:1.4.0
Package:azureml-dataset-runtime, training version:1.20.0, current version:1.19.0.post1
Package:azureml-defaults, training version:1.20.0, current version:1.19.0
Package:azureml-interpret, training version:1.20.0, current version:1.19.0
Package:azureml-pipeline-core, training version:1.20.0, current version:1.19.0
Package:azureml-telemetry, training version:1.20.0, current version:1.19.0
Package:azureml-train-automl-client, training version:1.20.0, current version:1.19.0
Package:azureml-train-automl-runtime, training version:1.20.0, current version:1.19.0


ModuleNotFoundError: No module named 'azureml.automl.runtime._ml_engine.featurizer_suggestion'

In [32]:
best_run.get_metrics()

{'Regularization Strength:': 100.0,
 'Max iterations:': 200,
 'Accuracy': 0.9176024279210926}

In [33]:
fitted_model

NameError: name 'fitted_model' is not defined

In [29]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
udacity-project,HD_61f9c0a4-df97-4581-ad23-1c0f9c38f3da_10,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [30]:
compute_target.delete()

ComputeTargetException: ComputeTargetException:
	Message: Received bad response from Resource Provider:
Response Code: 500
Headers: {'Cache-Control': 'no-cache', 'Pragma': 'no-cache', 'Content-Length': '1462', 'Content-Type': 'application/json; charset=utf-8', 'Expires': '-1', 'x-ms-failure-cause': 'service', 'Request-Context': 'appId=cid-v1:2d2e8e63-272e-4b3c-8598-4ee570a0e70d', 'x-ms-response-type': 'error', 'x-ms-client-request-id': 'c49ec302-fba7-4bcb-8022-f62395f33927', 'x-ms-client-session-id': '88edf138-d6b9-442c-9769-39b176b2fcfb', 'X-Content-Type-Options': 'nosniff', 'x-request-time': '0.062', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains', 'x-ms-ratelimit-remaining-subscription-deletes': '14999', 'x-ms-request-id': '0ea6abb8-1e17-4e04-a778-ebe4c2edb775', 'x-ms-correlation-request-id': '0ea6abb8-1e17-4e04-a778-ebe4c2edb775', 'x-ms-routing-request-id': 'SOUTHCENTRALUS:20210121T232437Z:0ea6abb8-1e17-4e04-a778-ebe4c2edb775', 'Date': 'Thu, 21 Jan 2021 23:24:36 GMT', 'Connection': 'close'}
Content: b'{\n  "error": {\n    "code": "ServiceError",\n    "severity": null,\n    "message": "Received 404 from a service request",\n    "messageFormat": null,\n    "messageParameters": null,\n    "referenceCode": null,\n    "detailsUri": null,\n    "target": "GET https://southcentralus.api.azureml.ms/mlc/subscriptions/610d6e37-4747-4a20-80eb-3aad70a55f43/resourceGroups/aml-quickstarts-135627/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-135627/computes/compute-cluster?workspaceId=76eaaf31-a1d6-4cfe-b084-9fe0912736f1&api-version=2019-11-01",\n    "details": [\n      {\n        "code": "NotFound",\n        "severity": null,\n        "message": "{\\"error\\":{\\"code\\":\\"ResourceNotFound\\",\\"message\\":\\"The resource was not found.\\",\\"innererror\\":{\\"clientRequestId\\":\\"c49ec302-fba7-4bcb-8022-f62395f33927\\",\\"serviceRequestId\\":\\"|00-23d8ebced9500e4db0eaf7291d3fa166-5ee7cd9dd57fea4c-00.1749bceb_\\"}}}",\n        "messageFormat": null,\n        "messageParameters": {},\n        "referenceCode": null,\n        "detailsUri": null,\n        "target": null,\n        "details": [],\n        "innerError": null,\n        "debugInfo": null\n      }\n    ],\n    "innerError": null,\n    "debugInfo": null\n  },\n  "correlation": {\n    "operation": "23d8ebced9500e4db0eaf7291d3fa166",\n    "request": "1296f291816fd94f"\n  },\n  "environment": "southcentralus",\n  "location": "southcentralus",\n  "time": "2021-01-21T23:24:37.1939904+00:00",\n  "componentName": "account-rp"\n}'
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Received bad response from Resource Provider:\nResponse Code: 500\nHeaders: {'Cache-Control': 'no-cache', 'Pragma': 'no-cache', 'Content-Length': '1462', 'Content-Type': 'application/json; charset=utf-8', 'Expires': '-1', 'x-ms-failure-cause': 'service', 'Request-Context': 'appId=cid-v1:2d2e8e63-272e-4b3c-8598-4ee570a0e70d', 'x-ms-response-type': 'error', 'x-ms-client-request-id': 'c49ec302-fba7-4bcb-8022-f62395f33927', 'x-ms-client-session-id': '88edf138-d6b9-442c-9769-39b176b2fcfb', 'X-Content-Type-Options': 'nosniff', 'x-request-time': '0.062', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains', 'x-ms-ratelimit-remaining-subscription-deletes': '14999', 'x-ms-request-id': '0ea6abb8-1e17-4e04-a778-ebe4c2edb775', 'x-ms-correlation-request-id': '0ea6abb8-1e17-4e04-a778-ebe4c2edb775', 'x-ms-routing-request-id': 'SOUTHCENTRALUS:20210121T232437Z:0ea6abb8-1e17-4e04-a778-ebe4c2edb775', 'Date': 'Thu, 21 Jan 2021 23:24:36 GMT', 'Connection': 'close'}\nContent: b'{\\n  \"error\": {\\n    \"code\": \"ServiceError\",\\n    \"severity\": null,\\n    \"message\": \"Received 404 from a service request\",\\n    \"messageFormat\": null,\\n    \"messageParameters\": null,\\n    \"referenceCode\": null,\\n    \"detailsUri\": null,\\n    \"target\": \"GET https://southcentralus.api.azureml.ms/mlc/subscriptions/610d6e37-4747-4a20-80eb-3aad70a55f43/resourceGroups/aml-quickstarts-135627/providers/Microsoft.MachineLearningServices/workspaces/quick-starts-ws-135627/computes/compute-cluster?workspaceId=76eaaf31-a1d6-4cfe-b084-9fe0912736f1&api-version=2019-11-01\",\\n    \"details\": [\\n      {\\n        \"code\": \"NotFound\",\\n        \"severity\": null,\\n        \"message\": \"{\\\\\"error\\\\\":{\\\\\"code\\\\\":\\\\\"ResourceNotFound\\\\\",\\\\\"message\\\\\":\\\\\"The resource was not found.\\\\\",\\\\\"innererror\\\\\":{\\\\\"clientRequestId\\\\\":\\\\\"c49ec302-fba7-4bcb-8022-f62395f33927\\\\\",\\\\\"serviceRequestId\\\\\":\\\\\"|00-23d8ebced9500e4db0eaf7291d3fa166-5ee7cd9dd57fea4c-00.1749bceb_\\\\\"}}}\",\\n        \"messageFormat\": null,\\n        \"messageParameters\": {},\\n        \"referenceCode\": null,\\n        \"detailsUri\": null,\\n        \"target\": null,\\n        \"details\": [],\\n        \"innerError\": null,\\n        \"debugInfo\": null\\n      }\\n    ],\\n    \"innerError\": null,\\n    \"debugInfo\": null\\n  },\\n  \"correlation\": {\\n    \"operation\": \"23d8ebced9500e4db0eaf7291d3fa166\",\\n    \"request\": \"1296f291816fd94f\"\\n  },\\n  \"environment\": \"southcentralus\",\\n  \"location\": \"southcentralus\",\\n  \"time\": \"2021-01-21T23:24:37.1939904+00:00\",\\n  \"componentName\": \"account-rp\"\\n}'"
    }
}