In [1]:
# Check core SDK version number
import azureml.core

print("SDK version:", azureml.core.VERSION)

SDK version: 1.19.0


In [2]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: Udemy_ML
Azure region: eastus2
Subscription id: ef78944e-d555-4976-84de-e8954c8a9357
Resource group: Machine_Learning


In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "capstone-CPU"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=2)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing compute target
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-02-20T18:20:45.245000+00:00', 'errors': None, 'creationTime': '2021-01-19T17:25:38.254875+00:00', 'modifiedTime': '2021-01-19T17:25:54.517722+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


In [5]:
import os

project_folder = './sklearn-DM'
os.makedirs(project_folder, exist_ok=True)

In [9]:
import shutil

shutil.copy('train_dm.py', project_folder)

'./sklearn-DM/train_dm.py'

In [7]:
from azureml.core import Experiment

experiment_name = 'train_dm_hyperdrive'
experiment = Experiment(ws, name=experiment_name)



In [8]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.2
- scikit-learn
- pip:
  - azureml-defaults

Writing conda_dependencies.yml


In [11]:
from azureml.core import Environment

sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')

In [23]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder,
                      script='train_dm.py',
                      arguments=['--kernel', 'linear', 
                                    '--penalty', 1.0],
                      compute_target=compute_target,
                      environment=sklearn_env)



In [20]:
run = experiment.submit(src)

In [21]:
from azureml.widgets import RunDetails

RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [22]:
run.wait_for_completion(show_output=True)


RunId: train_dm_hyperdrive_1613923020_b8e96a23
Web View: https://ml.azure.com/experiments/train_dm_hyperdrive/runs/train_dm_hyperdrive_1613923020_b8e96a23?wsid=/subscriptions/ef78944e-d555-4976-84de-e8954c8a9357/resourcegroups/Machine_Learning/workspaces/Udemy_ML

Streaming azureml-logs/55_azureml-execution-tvmps_501c744928cefc4dad07bd6ace6d6739a38b2ef17057b1b1ee6092594990023c_d.txt

2021-02-21T16:01:53Z Starting output-watcher...
2021-02-21T16:01:53Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
2021-02-21T16:01:53Z Executing 'Copy ACR Details file' on 10.0.0.5
2021-02-21T16:01:53Z Copy ACR Details file succeeded on 10.0.0.5. Output: 
>>>   
>>>   
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_b1e1549112fc4b4d2d32f3d6c4b8a2b3
8e097b52bfb8: Pulling fs layer
a613a9b4553c: Pulling fs layer
acc000f01536: Pulling fs layer
73eef93b7466: Pulling fs layer
d5a54c1fb97f: Pulling fs layer
1536f6ca931b: Pulling fs layer
d7b631d130cb: Pulling fs lay

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "AzureMLCompute job failed.\nJobFailed: Submitted script failed with a non-zero exit code; see the driver log file for details.\n\tReason: Job failed with non-zero exit Code",
        "messageFormat": "{Message}",
        "messageParameters": {
            "Message": "AzureMLCompute job failed.\nJobFailed: Submitted script failed with a non-zero exit code; see the driver log file for details.\n\tReason: Job failed with non-zero exit Code"
        },
        "details": [],
        "innerError": {
            "code": "BadArgument",
            "innerError": {
                "code": "AmlComputeBadRequest"
            }
        }
    },
    "correlation": {
        "operation": null,
        "request": "a1625d2e917a51b0"
    },
    "environment": "eastus2",
    "location": "eastus2",
    "time": "2021-02-21T16:03:52.555367Z",
    "componentName": "execution-worker"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"AzureMLCompute job failed.\\nJobFailed: Submitted script failed with a non-zero exit code; see the driver log file for details.\\n\\tReason: Job failed with non-zero exit Code\",\n        \"messageFormat\": \"{Message}\",\n        \"messageParameters\": {\n            \"Message\": \"AzureMLCompute job failed.\\nJobFailed: Submitted script failed with a non-zero exit code; see the driver log file for details.\\n\\tReason: Job failed with non-zero exit Code\"\n        },\n        \"details\": [],\n        \"innerError\": {\n            \"code\": \"BadArgument\",\n            \"innerError\": {\n                \"code\": \"AmlComputeBadRequest\"\n            }\n        }\n    },\n    \"correlation\": {\n        \"operation\": null,\n        \"request\": \"a1625d2e917a51b0\"\n    },\n    \"environment\": \"eastus2\",\n    \"location\": \"eastus2\",\n    \"time\": \"2021-02-21T16:03:52.555367Z\",\n    \"componentName\": \"execution-worker\"\n}"
    }
}

In [None]:
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice
    

param_sampling = RandomParameterSampling( {
    "--kernel": choice('linear', 'rbf', 'poly', 'sigmoid'),
    "--penalty": choice(0.5, 1, 1.5)
    }
)

hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling, 
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=12,
                                     max_concurrent_runs=4)

In [None]:
# start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_config)

In [None]:
RunDetails(hyperdrive_run).show()

In [None]:
hyperdrive_run.wait_for_completion(show_output=True)

In [None]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_run.get_details()['runDefinition']['arguments'])

In [None]:
print(best_run.get_file_names())

In [None]:
model = best_run.register_model(model_name='sklearn-iris', model_path='outputs/model.joblib')