# Hyperparameter Tuning using HyperDrive



In [1]:
import pandas as pd
import numpy as np 
from sklearn import datasets
import joblib

from azureml.core import Experiment, Model, Environment, ScriptRunConfig
from azureml.train.automl import AutoMLConfig
from azureml.core import Workspace, Dataset, Datastore 
from azureml.pipeline.steps import AutoMLStep
from azureml.core.model import InferenceConfig
from azureml.train.hyperdrive import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn


In [8]:
import os 
import shutil 

## Dataset



In [3]:
ws = Workspace.from_config()
experiment_name = 'exp-heart-failure-hyperdrive'

ws = Workspace.from_config()
ws.write_config(path='.azureml')
exp = Experiment(workspace=ws, name=experiment_name)



In [4]:
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')



Workspace name: quick-starts-ws-159915
Azure region: southcentralus
Subscription id: aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee
Resource group: aml-quickstarts-159915


In [5]:
run = exp.start_logging()


In [6]:
# datastore_name = 'workspaceblobstore'
# datastore = Datastore.get(ws, datastore_name)
# datastore_path = [(datastore, 'UI/10-01-2021_030828_UTC/heart_failure_dataset.csv')]
# ds = Dataset.Tabular.from_delimited_files(path=datastore_path)
# ds = ds.take(10).to_pandas_dataframe()

dataset = Dataset.get_by_name(ws, name='heart_failure_dataset')


In [7]:
from azureml.core.compute_target import ComputeTargetException


amlcompute_cluster_name = "mlops-compute"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',
                                                           min_nodes=1,    
                                                           max_nodes=6)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 15)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Hyperdrive Configuration


In [9]:
#Created an early termination bandit policy. 
#This is not required if we use Bayesian sampling.


early_termination_policy = BanditPolicy(slack_factor=0.1, evaluation_interval=3)

if "train_folder" not in os.listdir():
    os.mkdir("./train_folder")
script_folder = './train_folder'
os.makedirs(script_folder, exist_ok=True)
shutil.copy('./train.py', script_folder)    

#params sampling during training
param_sampling = RandomParameterSampling({
                                      "--C": uniform(0.1,1),
                                      "max_iter": choice([25,50, 75, 100, 125])
})
#estimator and hyperdrive config
estimator = SKLearn(source_directory=script_folder,compute_target="mlops-compute", entry_script='train.py', conda_packages= ["pandas"]
)



'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [10]:
hyperdrive_run_config = HyperDriveConfig(
    hyperparameter_sampling=param_sampling,
    primary_metric_name='Accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=24,
    max_concurrent_runs=6,
    estimator=estimator,
    policy=early_termination_policy

)

In [11]:
#Submit  experiment
hyperdrive_run = exp.submit(config=hyperdrive_run_config)
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf
Web View: https://ml.azure.com/runs/HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf?wsid=/subscriptions/aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee/resourcegroups/aml-quickstarts-159915/workspaces/quick-starts-ws-159915&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-10-03T16:53:02.539226][API][INFO]Experiment created<END>\n""<START>[2021-10-03T16:53:03.111185][GENERATOR][INFO]Trying to sample '6' jobs from the hyperparameter space<END>\n""<START>[2021-10-03T16:53:03.364721][GENERATOR][INFO]Successfully sampled '6' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf
Web View: https://ml.azure.com/runs/HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf?wsid=/subscriptions/aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee/resourcegroups/aml-quickstarts-159915/workspaces/quick-starts-ws-159915&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254



{'runId': 'HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf',
 'target': 'mlops-compute',
 'status': 'Completed',
 'startTimeUtc': '2021-10-03T16:53:02.276162Z',
 'endTimeUtc': '2021-10-03T17:04:28.065687Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'e38cdba3-2af8-4e4f-8120-64e91b492ff4',
  'user_agent': 'python/3.6.9 (Linux-5.4.0-1056-azure-x86_64-with-debian-buster-sid) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.34.0',
  'score': '0.9111111111111111',
  'best_child_run_id': 'HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf_13',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg159915.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf/azureml-logs/hyperdrive.txt?

## Run Details


In [12]:
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf
Web View: https://ml.azure.com/runs/HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf?wsid=/subscriptions/aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee/resourcegroups/aml-quickstarts-159915/workspaces/quick-starts-ws-159915&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Execution Summary
RunId: HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf
Web View: https://ml.azure.com/runs/HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf?wsid=/subscriptions/aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee/resourcegroups/aml-quickstarts-159915/workspaces/quick-starts-ws-159915&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254



{'runId': 'HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf',
 'target': 'mlops-compute',
 'status': 'Completed',
 'startTimeUtc': '2021-10-03T16:53:02.276162Z',
 'endTimeUtc': '2021-10-03T17:04:28.065687Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'e38cdba3-2af8-4e4f-8120-64e91b492ff4',
  'user_agent': 'python/3.6.9 (Linux-5.4.0-1056-azure-x86_64-with-debian-buster-sid) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.34.0',
  'score': '0.9111111111111111',
  'best_child_run_id': 'HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf_13',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg159915.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf/azureml-logs/hyperdrive.txt?

## Best Model

#get the best model from the hyperdrive experiments and display all the properties of the model.

In [13]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run.get_details() 

{'runId': 'HD_089ecc52-c1a3-4539-9aa1-1911f0ff44cf_13',
 'target': 'mlops-compute',
 'status': 'Completed',
 'startTimeUtc': '2021-10-03T17:01:17.534513Z',
 'endTimeUtc': '2021-10-03T17:01:48.128386Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': 'e38cdba3-2af8-4e4f-8120-64e91b492ff4',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'train.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--C', '0.5078518299558458', '--max_iter', '75'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'mlops-compute',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],
  'jobName': None,
  'maxRunDurationSeconds': None,
  'nodeCount': 1,
  'instanceTypes': [],
  'priority': None,
  'credentialPassthrough': False,
  'id

In [15]:
#Saving the best model
#best_model = best_run.register_model(model_name = 'best_hyperdrive_model', model_path = 'outputs/model.joblib')

In [18]:
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration


model = Model.register(workspace=ws,
                       model_name='best_hyperdrive_model',                # Name of the registered model in your workspace.
                       model_path= './model.joblib',  # Local file to upload and register as a model.
                       model_framework=Model.Framework.SCIKITLEARN,  # Framework used to create the model.
                       model_framework_version=sklearn.__version__,  # Version of scikit-learn used to create the model.
                       #sample_input_dataset=input_dataset,
                       #sample_output_dataset=output_dataset,
                       resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=1),
                       description='heart failure preds',
                       tags={'area': 'heart failure', 'type': 'classification'})


Registering model best_hyperdrive_model


## Model Deployment


#register the model, create an inference config and deploy the model as a web service.

TODO: In the cell below, send a request to the web service you deployed to test it.

In [19]:
aci_service_name = 'my-sklearn-service'
aci_service = Model.deploy(ws, aci_service_name, [model])
aci_service.wait_for_deployment(True)
print(aci_service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-10-03 17:42:22+00:00 Creating Container Registry if not exists.
2021-10-03 17:42:22+00:00 Registering the environment.
2021-10-03 17:42:24+00:00 Uploading autogenerated assets for no-code-deployment.
2021-10-03 17:42:24+00:00 Building image..
2021-10-03 17:47:53+00:00 Generating deployment configuration.
2021-10-03 17:47:54+00:00 Submitting deployment to compute..
2021-10-03 17:47:58+00:00 Checking the status of deployment my-sklearn-service..
2021-10-03 17:49:41+00:00 Checking the status of inference endpoint my-sklearn-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [20]:
aci_service.update(enable_app_insights=True)

#logs of the web service and delete the service

In [21]:
print(aci_service.get_logs())

2021-10-03T17:53:16,943632228+00:00 - gunicorn/run 
Dynamic Python package installation is disabled.
Starting HTTP server
2021-10-03T17:53:16,944610056+00:00 - rsyslog/run 
2021-10-03T17:53:17,143436950+00:00 - iot-server/run 
2021-10-03T17:53:17,340637297+00:00 - nginx/run 
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-10-03T17:53:19,441063417+00:00 - iot-server/finish 1 0
2021-10-03T17:53:19,541598047+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 19.9.0
Listening at: http://127.0.0.1:31311 (11)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 39



In [25]:
import json 
import requests 

df_test = dataset.take(10).to_pandas_dataframe()
df_test_label = df_test.pop('DEATH_EVENT')
df_test_sample = json.dumps({'data': df_test.to_dict(orient='records')})
service_response = aci_service.run(input_data=df_test_sample)
service_response

{'predict_proba': [[0.18369319511183924, 0.8163068048881608],
  [0.08866751051103117, 0.9113324894889688],
  [0.21987106753380625, 0.7801289324661937],
  [0.25486049436430847, 0.7451395056356915],
  [0.2559196250656822, 0.7440803749343178],
  [0.22645154534184153, 0.7735484546581585],
  [0.17609598972258755, 0.8239040102774124],
  [0.4244060128287611, 0.5755939871712389],
  [0.396493763457915, 0.603506236542085],
  [0.25621506284440665, 0.7437849371555934]]}

In [26]:
df_test_label

0    1
1    1
2    1
3    1
4    1
5    1
6    1
7    1
8    1
9    1
Name: DEATH_EVENT, dtype: int64

# Run through scoring uri

In [None]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
data = {"data": [{"age": 75.0, "anaemia": 0, "creatinine_phosphokinase": 582, "diabetes": 0, "ejection_fraction": 20, "high_blood_pressure": 1, "platelets": 265000.0, "serum_creatinine": 1.9, "serum_sodium": 130, "sex": 1, "smoking": 0, "time": 4}, {"age": 55.0, "anaemia": 0, "creatinine_phosphokinase": 7861, "diabetes": 0, "ejection_fraction": 38, "high_blood_pressure": 0, "platelets": 263358.03, "serum_creatinine": 1.1, "serum_sodium": 136, "sex": 1, "smoking": 0, "time": 6}, {"age": 65.0, "anaemia": 0, "creatinine_phosphokinase": 146, "diabetes": 0, "ejection_fraction": 20, "high_blood_pressure": 0, "platelets": 162000.0, "serum_creatinine": 1.3, "serum_sodium": 129, "sex": 1, "smoking": 1, "time": 7}, {"age": 50.0, "anaemia": 1, "creatinine_phosphokinase": 111, "diabetes": 0, "ejection_fraction": 20, "high_blood_pressure": 0, "platelets": 210000.0, "serum_creatinine": 1.9, "serum_sodium": 137, "sex": 1, "smoking": 0, "time": 7}, {"age": 65.0, "anaemia": 1, "creatinine_phosphokinase": 160, "diabetes": 1, "ejection_fraction": 20, "high_blood_pressure": 0, "platelets": 327000.0, "serum_creatinine": 2.7, "serum_sodium": 116, "sex": 0, "smoking": 0, "time": 8}, {"age": 90.0, "anaemia": 1, "creatinine_phosphokinase": 47, "diabetes": 0, "ejection_fraction": 40, "high_blood_pressure": 1, "platelets": 204000.0, "serum_creatinine": 2.1, "serum_sodium": 132, "sex": 1, "smoking": 1, "time": 8}, {"age": 75.0, "anaemia": 1, "creatinine_phosphokinase": 246, "diabetes": 0, "ejection_fraction": 15, "high_blood_pressure": 0, "platelets": 127000.0, "serum_creatinine": 1.2, "serum_sodium": 137, "sex": 1, "smoking": 0, "time": 10}, {"age": 60.0, "anaemia": 1, "creatinine_phosphokinase": 315, "diabetes": 1, "ejection_fraction": 60, "high_blood_pressure": 0, "platelets": 454000.0, "serum_creatinine": 1.1, "serum_sodium": 131, "sex": 1, "smoking": 1, "time": 10}, {"age": 65.0, "anaemia": 0, "creatinine_phosphokinase": 157, "diabetes": 0, "ejection_fraction": 65, "high_blood_pressure": 0, "platelets": 263358.03, "serum_creatinine": 1.5, "serum_sodium": 138, "sex": 0, "smoking": 0, "time": 10}, {"age": 80.0, "anaemia": 1, "creatinine_phosphokinase": 123, "diabetes": 0, "ejection_fraction": 35, "high_blood_pressure": 1, "platelets": 388000.0, "serum_creatinine": 9.4, "serum_sodium": 133, "sex": 1, "smoking": 1, "time": 10}]}

body = str.encode(json.dumps(data))

url = 'http://9a5d892d-7ed6-4093-946e-c6c42f7a035d.southcentralus.azurecontainer.io/score'
api_key = '' # Replace this with the API key for the web service
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(json.loads(error.read().decode("utf8", 'ignore')))


In [None]:
aci_service.delete()