# Hyperparameter tuning using HyperDrive

In [1]:
#Import required libraries
from azureml.core import Workspace, Experiment, Model
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy, MedianStoppingPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.core.environment import Environment
from azureml.automl.core.shared import constants
import azureml.core
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.model import Model
import numpy as np
import pandas as pd
import os
import shutil
import json
import joblib
import requests

In [2]:
#find the workspace and create an environment with the name "hp"
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="hp")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')
#start logging on the experiment created
run = exp.start_logging()

Workspace name: quick-starts-ws-142131
Azure region: southcentralus
Subscription id: aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee
Resource group: aml-quickstarts-142131


In [3]:
# Name the cpu cluster
cpu_cluster_name = "hpcluster"

# Verify whether the cluster already exists
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster')

#create a new cluster with a "vm_size='STANDARD_D12_V2' and at most 4 nodes   
except ComputeTargetException:  
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D12_V2', max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
    print("New cluster is created")

New cluster is created


In [4]:
from azureml.train.hyperdrive.parameter_expressions import randint
from azureml.train.sklearn import SKLearn

#define a new environment from the conda_dependencies.yml file
myenv=Environment.from_conda_specification(name="myenv",
                                                  file_path="./conda_dependencies.yml")

#The early termination policy is defined
#Here the slack factor taken is '0.1' and delay evaluation as 5
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=2, delay_evaluation=5)

#The RandomParameterSampler was used since it allows for more efficient results
ps = RandomParameterSampling(
    {
        'C':uniform(0,1),
        "max_iter":randint(150)
    })

if "training" not in os.listdir():
    os.mkdir("./training")

#train the scikit-learn model using the train.py script on the cpu cluster already defined
est = SKLearn(source_directory='.',  
              compute_target=cpu_cluster,
              entry_script='train.py', 
              vm_size = 'STANDARD_D12_V2', 
            )

#the model hyperparameters are configured using the estimated model. the hyperparameter sampler, and the termination policy
#The goal is to maximize the 'accuracy' of the model
hyperdrive_config = HyperDriveConfig(estimator=est,
                                     hyperparameter_sampling=ps,
                                     policy=policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=10,
                                     max_concurrent_runs=4)


'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [5]:
#the experiment was submited
hdr=exp.submit(config=hyperdrive_config)



In [13]:
#the RunDetails widget is used to view the model training progress
RunDetails(hdr).show()
hdr.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_beb86ea9-fc5d-46f4-9a9f-02476dd2a2c8
Web View: https://ml.azure.com/experiments/hp/runs/HD_beb86ea9-fc5d-46f4-9a9f-02476dd2a2c8?wsid=/subscriptions/aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee/resourcegroups/aml-quickstarts-142131/workspaces/quick-starts-ws-142131

Execution Summary
RunId: HD_beb86ea9-fc5d-46f4-9a9f-02476dd2a2c8
Web View: https://ml.azure.com/experiments/hp/runs/HD_beb86ea9-fc5d-46f4-9a9f-02476dd2a2c8?wsid=/subscriptions/aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee/resourcegroups/aml-quickstarts-142131/workspaces/quick-starts-ws-142131



{'runId': 'HD_beb86ea9-fc5d-46f4-9a9f-02476dd2a2c8',
 'target': 'hpcluster',
 'status': 'Completed',
 'startTimeUtc': '2021-04-06T23:23:41.808151Z',
 'endTimeUtc': '2021-04-06T23:33:10.243375Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '0abd24ea-211e-4bbc-bcd0-946213c96f66',
  'score': '0.9',
  'best_child_run_id': 'HD_beb86ea9-fc5d-46f4-9a9f-02476dd2a2c8_2',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg142131.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_beb86ea9-fc5d-46f4-9a9f-02476dd2a2c8/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=XyRwftXbQP2yu2%2FcJUHBQm6n2XvXc60pv0CYMT4FflE%3D&st=2021-04-06T23%3A23%3A29Z&se=2021-04-07T07%3A33%3A29Z&sp=r'},
 'submittedBy': 'ODL_User 142131'}

In [8]:
#to retrieve the best metrics 
best_run = hdr.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('parameter values:',parameter_values)
print('\n Best run \n',best_run)

print('The Id for best run is:', best_run.id)
print('The Accuracy: is', best_run_metrics['Accuracy'])

parameter values: ['--C', '0.7013049655937129', '--max_iter', '116']

 Best run 
 Run(Experiment: hp,
Id: HD_beb86ea9-fc5d-46f4-9a9f-02476dd2a2c8_2,
Type: azureml.scriptrun,
Status: Completed)
The Id for best run is: HD_beb86ea9-fc5d-46f4-9a9f-02476dd2a2c8_2
The Accuracy: is 0.9


In [9]:
#save the best model obtained
joblib.dump(parameter_values, filename='bestHpModel.pkl')

['bestHpModel.pkl']

In [10]:
#register the model
best_model = best_run.register_model('bestHpModel', model_path = 'outputs/model.joblib')

# Deploy the best model

In [11]:
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import AciWebservice, Webservice


# Combine scoring script & environment in Inference configuration
inference_config = InferenceConfig(entry_script="score.py", environment=myenv)

# Set deployment configuration
deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

# Define the settings of the web service
service = Model.deploy(
    workspace = ws,
    name = "hpwebservice",
    models = [best_model],
    inference_config = inference_config,
    deployment_config = deployment_config)

service.wait_for_deployment(show_output = True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-04-06 23:33:51+00:00 Creating Container Registry if not exists.
2021-04-06 23:33:52+00:00 Registering the environment.
2021-04-06 23:33:54+00:00 Building image..
2021-04-06 23:46:05+00:00 Generating deployment configuration..
2021-04-06 23:46:07+00:00 Submitting deployment to compute..
2021-04-06 23:46:12+00:00 Checking the status of deployment hpwebservice..
2021-04-06 23:50:54+00:00 Checking the status of inference endpoint hpwebservice.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [12]:
#display the Rest Endpoint link(Scoring URI)
print(service.scoring_uri)

http://ee2d7936-a6c9-42d7-8e2c-7e1d00e71433.southcentralus.azurecontainer.io/score


In [23]:
#execute the endpoint.py py script file to test the rest endpoint with the data supplied and to get the predicted output.
%run endpoint.py

[1, 0]


# log Details

In [24]:
# Get the service logs
service.get_logs()



In [None]:
# Delete the web service we created
service.delete()