# Tuning hyperparameters using HyperDrive

In [1]:
#Importing all the Dependencies required
from azureml.core import Workspace, Experiment, Model
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy, MedianStoppingPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.core.environment import Environment
from azureml.automl.core.shared import constants
import azureml.core
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.model import Model
import numpy as np
import pandas as pd
import os
import shutil
import json
import joblib
import requests

In [2]:
#find the workspace and create the environment with the name "hperdrive". Start logging using exp.
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="hyperdrive")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-136095
Azure region: southcentralus
Subscription id: 3d1a56d2-7c81-4118-9790-f85d1acf0c77
Resource group: aml-quickstarts-136095


In [3]:
# Choose a name for your CPU cluster
cpu_cluster_name = "cpuacluster"

# to verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster')

#create a new cluster with the specified configurations "vm_size='STANDARD_D12_V2' and max number of nodes as 4   
except ComputeTargetException:  
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D12_V2', max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
    print("New cluster is created")

New cluster is created


In [4]:
from azureml.train.hyperdrive.parameter_expressions import randint
from azureml.train.sklearn import SKLearn

#get the specifications for the environment from conda_dependencies.yml file and name it as "myenv"
myenv=Environment.from_conda_specification(name="myenv",
                                                  file_path="./conda_dependencies.yml")

#I used BanditPolicy.Here the evaluation interval specifies the frequency of applying the policy which is 2.
#The runs whose best metric is less than (primary metric of best performing run at given interval/(1+slack factor)) will be terminated.
#Here the slack factor taken is '0.1' and delay evaluation as 5.
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=2, delay_evaluation=5)

#I used the RandomParameterSampler as it can be used for the early termination of the low-performance runs 
#and also to find better metrics that could help to refine the search space.
#The Random Search for parameters can get good results with less amount of time.
ps = RandomParameterSampling(
    {
        'C':uniform(0,1),
        "max_iter":randint(150)
    })

if "training" not in os.listdir():
    os.mkdir("./training")

#use SKLearn estimator with train.py as the entry script file on the compute target we created.
est = SKLearn(source_directory='.',  
              compute_target=cpu_cluster,
              entry_script='train.py', 
              vm_size = 'STANDARD_D12_V2', 
            )

#The hyperdrive is configured using the SKLearn estimator, hyperparameter sampler, and policy. 
#The goal is to maximize the primary metric 'accuracy'.
hyperdrive_config = HyperDriveConfig(estimator=est,
                                     hyperparameter_sampling=ps,
                                     policy=policy,
                                     primary_metric_name='Accuracy',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=10,
                                     max_concurrent_runs=4)


'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [5]:
#Submit your experiment
hdr=exp.submit(config=hyperdrive_config)



In [6]:
#RunDetails widget is used to view the progress of model training
RunDetails(hdr).show()
hdr.wait_for_completion(show_output=True)


_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_bdea9e31-d7d9-46db-9780-38b82d6cca8c
Web View: https://ml.azure.com/experiments/hyperdrive/runs/HD_bdea9e31-d7d9-46db-9780-38b82d6cca8c?wsid=/subscriptions/3d1a56d2-7c81-4118-9790-f85d1acf0c77/resourcegroups/aml-quickstarts-136095/workspaces/quick-starts-ws-136095

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-01-26T08:42:49.033345][API][INFO]Experiment created<END>\n""<START>[2021-01-26T08:42:49.564067][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-01-26T08:42:49.728336][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-01-26T08:42:50.7838263Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_bdea9e31-d7d9-46db-9780-38b82d6cca8c
Web View: https://ml.azure.com/experiments/hyperdrive/runs/HD_bdea9e31-d7d9-46db-9780-38b82d6cca8c?wsid=/subscriptions/3d1a56d2-7c81-4

{'runId': 'HD_bdea9e31-d7d9-46db-9780-38b82d6cca8c',
 'target': 'cpuacluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-26T08:42:48.766792Z',
 'endTimeUtc': '2021-01-26T08:52:52.078157Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '6918ccaa-9d97-49f4-a653-219f0469105b',
  'score': '0.9166666666666666',
  'best_child_run_id': 'HD_bdea9e31-d7d9-46db-9780-38b82d6cca8c_4',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg136095.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_bdea9e31-d7d9-46db-9780-38b82d6cca8c/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=koaHfeYgm8nDcPKwv%2F0Ogk7vOD70u0FRQv48MK2JqfU%3D&st=2021-01-26T08%3A43%3A35Z&se=2021-01-26T16%3A53%3A35Z&sp=r'},
 'submittedBy': 'ODL_User 136095'}

In [7]:
#to retrieve the best metrics 
best_run = hdr.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('parameter values:',parameter_values)
print('\n Best run \n',best_run)

print('The Id for best run is:', best_run.id)
print('The Accuracy: is', best_run_metrics['Accuracy'])

parameter values: ['--C', '0.20996327785012214', '--max_iter', '30']

 Best run 
 Run(Experiment: hyperdrive,
Id: HD_bdea9e31-d7d9-46db-9780-38b82d6cca8c_4,
Type: azureml.scriptrun,
Status: Completed)
The Id for best run is: HD_bdea9e31-d7d9-46db-9780-38b82d6cca8c_4
The Accuracy: is 0.9166666666666666


In [8]:
#save the best model obtained
joblib.dump(parameter_values, filename='best_hyperdrive_model.pkl')

['best_hyperdrive_model.pkl']

In [9]:
#register the model
best_model = best_run.register_model('best_hyper_model', model_path = 'outputs/model.joblib')

# Deploy the best model

In [10]:
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import AciWebservice, Webservice


# Combine scoring script & environment in Inference configuration
inference_config = InferenceConfig(entry_script="score.py", environment=myenv)

# Set deployment configuration
deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)

# Define the model, inference, & deployment configuration and web service name and location to deploy
service = Model.deploy(
    workspace = ws,
    name = "my-web-service",
    models = [best_model],
    inference_config = inference_config,
    deployment_config = deployment_config)

service.wait_for_deployment(show_output = True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running.....................................................................................................................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [11]:
#display the Rest Endpoint link(Scoring URI)
print(service.scoring_uri)


http://958689da-273e-46d0-8043-9d5cde91624c.southcentralus.azurecontainer.io/score


In [13]:
#execute the endpoint.py py script file to test the rest endpoint with the data supplied and to get the predicted output.
%run endpoint.py

[1]


# log Details

In [14]:
service.get_logs()



Delete the web service we created before using delete() function.

In [15]:
service.delete()