In [1]:
# Check core SDK version number
import azureml.core

print("SDK version:", azureml.core.VERSION)

SDK version: 1.19.0


In [2]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: Udemy_ML
Azure region: eastus2
Subscription id: ef78944e-d555-4976-84de-e8954c8a9357
Resource group: Machine_Learning


In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "capstone-CPU"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=2)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing compute target
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-02-20T18:20:45.245000+00:00', 'errors': None, 'creationTime': '2021-01-19T17:25:38.254875+00:00', 'modifiedTime': '2021-01-19T17:25:54.517722+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


In [5]:
import os

project_folder = './sklearn-DM'
os.makedirs(project_folder, exist_ok=True)

In [36]:
import shutil

shutil.copy('train_dm.py', project_folder)

'./sklearn-DM/train_dm.py'

In [7]:
from azureml.core import Experiment

experiment_name = 'train_dm_hyperdrive'
experiment = Experiment(ws, name=experiment_name)



In [8]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.2
- scikit-learn
- pip:
  - azureml-defaults

Writing conda_dependencies.yml


In [11]:
from azureml.core import Environment

sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')

In [37]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder,
                      script='train_dm.py',
                      arguments=['--kernel', 'linear', 
                                    '--penalty', 1.0],
                      compute_target=compute_target,
                      environment=sklearn_env)



In [38]:
run = experiment.submit(src)

In [39]:
from azureml.widgets import RunDetails

RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [40]:
run.wait_for_completion(show_output=True)


RunId: train_dm_hyperdrive_1613926293_72986b27
Web View: https://ml.azure.com/experiments/train_dm_hyperdrive/runs/train_dm_hyperdrive_1613926293_72986b27?wsid=/subscriptions/ef78944e-d555-4976-84de-e8954c8a9357/resourcegroups/Machine_Learning/workspaces/Udemy_ML

Streaming azureml-logs/55_azureml-execution-tvmps_b74dbfb902e4b1a9e1fbab9ec69b3f83144fda4b513b9fb9609aaaf8c67ba17c_d.txt

2021-02-21T16:57:31Z Starting output-watcher...
2021-02-21T16:57:31Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
2021-02-21T16:57:32Z Executing 'Copy ACR Details file' on 10.0.0.5
2021-02-21T16:57:32Z Copy ACR Details file succeeded on 10.0.0.5. Output: 
>>>   
>>>   
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_b1e1549112fc4b4d2d32f3d6c4b8a2b3
8e097b52bfb8: Pulling fs layer
a613a9b4553c: Pulling fs layer
acc000f01536: Pulling fs layer
73eef93b7466: Pulling fs layer
d5a54c1fb97f: Pulling fs layer
1536f6ca931b: Pulling fs layer
d7b631d130cb: Pulling fs lay

{'runId': 'train_dm_hyperdrive_1613926293_72986b27',
 'target': 'capstone-CPU',
 'status': 'Completed',
 'startTimeUtc': '2021-02-21T16:57:28.603809Z',
 'endTimeUtc': '2021-02-21T17:00:02.159028Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': '3f2c347b-1a15-4ea3-956f-0f8e2848905a',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [{'dataset': {'id': 'e953d0b2-e8e3-423c-afa8-18f4f407d0b8'}, 'consumptionDetails': {'type': 'Reference'}}],
 'outputDatasets': [],
 'runDefinition': {'script': 'train_dm.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--kernel', 'linear', '--penalty', '1'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'capstone-CPU',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'priority': None,
  '

In [41]:
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice
    

param_sampling = RandomParameterSampling( {
    "--kernel": choice('linear', 'rbf', 'poly', 'sigmoid'),
    "--penalty": choice(0.5, 1, 1.5)
    }
)

hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling, 
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=12,
                                     max_concurrent_runs=4)

In [42]:
# start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_config)

In [43]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [44]:
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_1d2af6be-ba06-48b7-b909-599025316def
Web View: https://ml.azure.com/experiments/train_dm_hyperdrive/runs/HD_1d2af6be-ba06-48b7-b909-599025316def?wsid=/subscriptions/ef78944e-d555-4976-84de-e8954c8a9357/resourcegroups/Machine_Learning/workspaces/Udemy_ML

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-02-21T17:01:33.179995][API][INFO]Experiment created<END>\n""<START>[2021-02-21T17:01:33.721332][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n""<START>[2021-02-21T17:01:33.574006][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n"<START>[2021-02-21T17:01:34.1442357Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END><START>[2021-02-21T17:02:04.5329830Z][SCHEDULER][INFO]Scheduling job, id='HD_1d2af6be-ba06-48b7-b909-599025316def_0'<END><START>[2021-02-21T17:02:04.5313979Z][SCHEDULER][INFO]The execution environment was success

{'runId': 'HD_1d2af6be-ba06-48b7-b909-599025316def',
 'target': 'capstone-CPU',
 'status': 'Completed',
 'startTimeUtc': '2021-02-21T17:01:32.942503Z',
 'endTimeUtc': '2021-02-21T17:11:25.791554Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '3f2c347b-1a15-4ea3-956f-0f8e2848905a',
  'score': '0.9711538461538461',
  'best_child_run_id': 'HD_1d2af6be-ba06-48b7-b909-599025316def_7',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://udemyml9610555099.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_1d2af6be-ba06-48b7-b909-599025316def/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=3jgEtIV%2B2%2BAL5bcnX9jxxSp7p%2Bs%2BO1EFMiveToHVuc0%3D&st=2021-02-21T17%3A01%3A28Z&se=2021-02-22T01%3A11%3A28Z&sp=r'}}

In [45]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print(best_run.get_details()['runDefinition']['arguments'])

['--kernel', 'linear', '--penalty', '1', '--kernel', 'rbf', '--penalty', '1.5']


In [46]:
print(best_run.get_file_names())

['azureml-logs/55_azureml-execution-tvmps_b74dbfb902e4b1a9e1fbab9ec69b3f83144fda4b513b9fb9609aaaf8c67ba17c_d.txt', 'azureml-logs/65_job_prep-tvmps_b74dbfb902e4b1a9e1fbab9ec69b3f83144fda4b513b9fb9609aaaf8c67ba17c_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_b74dbfb902e4b1a9e1fbab9ec69b3f83144fda4b513b9fb9609aaaf8c67ba17c_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/106_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/model.joblib']


In [49]:
model = best_run.register_model(model_name='diabetes_hyperdrive_final', model_path='outputs/model.joblib')

In [50]:
from azureml.core.model import InferenceConfig

inference_config = InferenceConfig(entry_script="inference.py",
                                   environment=sklearn_env)

In [51]:
# Webservice deployment

from azureml.core.webservice import AciWebservice, Webservice
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)


# Local deployment

# from azureml.core.webservice import LocalWebservice
# deployment_config = LocalWebservice.deploy_configuration()

In [52]:
from azureml.core.model import Model

model = ws.models["diabetes_hyperdrive_final"]

service = Model.deploy(
    workspace=ws,
    name='dm-hyperdrive-1',
    models=[model],
    inference_config=inference_config,
    deployment_config=deployment_config
)

service.wait_for_deployment(True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running..........................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [53]:
print(service.state)
print("scoring URI: " + service.scoring_uri)

Healthy
scoring URI: http://325b05ee-6964-4bc3-9e99-4b0ad3db5d8e.eastus2.azurecontainer.io/score


In [54]:
import json

x_input = [{ "Age": [.78],
            "Gender": [1], 
            "Polyuria": [1], 
            "Polydipsia": [1], 
            "sudden weight loss": [1], 
            "weakness": [1], 
            "Polyphagia": [0], 
            "Genital thrush": [0], 
            "visual blurring": [0], 
            "Itching": [1], 
            "Irritability": [0], 
            "delayed healing": [0], 
            "partial paresis": [0], 
            "muscle stiffness": [1], 
            "Alopecia": [0],
            "Obesity": [0]}
]
json_data = json.dumps({"data": x_input}) 

In [57]:
import json

x_input = [{ "Age": .78,
            "Gender": 1, 
            "Polyuria": 1, 
            "Polydipsia": 1, 
            "sudden weight loss": 1, 
            "weakness": 1, 
            "Polyphagia": 0, 
            "Genital thrush": 0, 
            "visual blurring": 0, 
            "Itching": 1, 
            "Irritability": 0, 
            "delayed healing": 0, 
            "partial paresis": 0, 
            "muscle stiffness": 1, 
            "Alopecia": 0,
            "Obesity": 0}
]
json_data = json.dumps({"data": x_input}) 

In [67]:
x_input = [{ "Age": .25,
            "Gender": 1, 
            "Polyuria": 0, 
            "Polydipsia": 0, 
            "sudden weight loss": 0, 
            "weakness": 0, 
            "Polyphagia": 0, 
            "Genital thrush": 0, 
            "visual blurring": 0, 
            "Itching": 0, 
            "Irritability": 0, 
            "delayed healing": 0, 
            "partial paresis": 0, 
            "muscle stiffness": 0, 
            "Alopecia": 0,
            "Obesity": 0}
]
json_data = json.dumps({"data": x_input}) 

In [68]:
json_data

'{"data": [{"Age": 0.25, "Gender": 1, "Polyuria": 0, "Polydipsia": 0, "sudden weight loss": 0, "weakness": 0, "Polyphagia": 0, "Genital thrush": 0, "visual blurring": 0, "Itching": 0, "Irritability": 0, "delayed healing": 0, "partial paresis": 0, "muscle stiffness": 0, "Alopecia": 0, "Obesity": 0}]}'

In [69]:
response = service.run(input_data=json_data)
print(response)

[0]


# AutoML Section

In [None]:
experiment_name = 'train_dm_automl'
experiment = Experiment(ws, name=experiment_name)


In [73]:
    from azureml.core import Dataset

    # get the diabetes input dataset by ID
    dataset_name = 'early_stage_diabetes_transformed'
    dataset = Dataset.get_by_name(workspace=ws, name=dataset_name)

In [74]:
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(
    experiment_timeout_minutes=15,
    max_concurrent_iterations=4,
    compute_target=compute_target,
    task='classification',
    primary_metric='accuracy',
    training_data=dataset,
    label_column_name='class',
    n_cross_validations=5)

In [None]:
from azureml.pipeline.core import PipelineData, TrainingOutput
from azureml.pipeline.core import PipelineRun

ds = ws.get_default_datastore()
metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'

metrics_data = PipelineData(name='metrics_data',
                           datastore=ds,
                           pipeline_output_name=metrics_output_name,
                           training_output=TrainingOutput(type='Metrics'))
model_data = PipelineData(name='model_data',
                           datastore=ds,
                           pipeline_output_name=best_model_output_name,
                           training_output=TrainingOutput(type='Model'))

In [77]:
from azureml.pipeline.steps import AutoMLStep

automl_step = AutoMLStep(
    name='diabetes_automl',
    automl_config=automl_config,
    outputs=[metrics_data, model_data],
    allow_reuse=True)

In [78]:
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(
    description="diabetes_pipeline_w_automlstep",
    workspace=ws,    
    steps=[automl_step])

In [80]:
pipeline_run = experiment.submit(pipeline)

Created step diabetes_automl [1ff8b972][5e2c6d63-90d3-4284-8e95-71e933aa2143], (This step will run and generate new outputs)
Submitted PipelineRun e6dacc2d-9479-4bf0-8e26-e85f258495c8
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/train_dm_automl/runs/e6dacc2d-9479-4bf0-8e26-e85f258495c8?wsid=/subscriptions/ef78944e-d555-4976-84de-e8954c8a9357/resourcegroups/Machine_Learning/workspaces/Udemy_ML


In [None]:
from azureml.widgets import RunDetails
RunDetails(pipeline_run).show()