# Hyperparameter Tuning using HyperDrive

In [1]:
from azureml.core import Workspace, Experiment,Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

from azureml.widgets import RunDetails
from azureml.core import ScriptRunConfig
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice

import os
import joblib

In [None]:
currDir=os.getcwd()
print(currDir)
os.listdir(currDir)

## Initialize Workspace and Create an Azure ML experiment

In [None]:
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name = 'hyperdrive-experiment')
project_folder = './hyperdrive-model'

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

## Dataset

In [2]:
dataset=Dataset.get_by_name(ws,name="Credit-Card-Churners")
dataset

Workspace name: quick-starts-ws-132510
Azure region: southcentralus
Subscription id: 81cefad3-d2c9-4f77-a466-99a7f541c7bb
Resource group: aml-quickstarts-132510


## Create or Attach an AmlCompute cluster

In [4]:
compute_name = "compute-aml-cluster"

# Check if the compute target exists
try:
    compute_aml_cluster = ComputeTarget(workspace=ws, name=compute_name)
    print('Found existing cluster.')
except ComputeTargetException:
    # If not, create it
    compute_config = AmlCompute.provisionibng_configuration(vm_size='STANDARD_DS12_V2',
                                                           max_nodes=5)
    compute_aml_cluster = ComputeTarget.create(ws, compute_name, compute_config)

compute_aml_cluster.wait_for_completion(show_output=True)

Found existing cluster.

Running


## Environment setup

In [1]:
from azureml.core import Environment
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

ModuleNotFoundError: No module named 'azureml'

## Hyperdrive Configuration

In [6]:
# Specify parameter sampler
param_space = {
         '--n_estimators': choice(2, 4, 8, 16, 32, 64, 200),
         '--max_depth': choice(5, 8, 15, 25, 30),
         '--min_samples_split': choice(2, 5, 10, 15, 100),
         '--min_samples_leaf': choice(1, 2, 5, 10)
    
     }

ps = RandomParameterSampling(param_space)

# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1,
                      evaluation_interval=2,
                      delay_evaluation=0)

# if "training" not in os.listdir():
#     os.mkdir("./training")

#Create your estimator and hyperdrive config
src = ScriptRunConfig(source_directory='.',
                      script='train.py',
                      compute_target=compute_aml_cluster,
                      environment=sklearn_env)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config =HyperDriveConfig(
                              hyperparameter_sampling=ps,
                              policy=policy,
                              primary_metric_name='Accuracy',
                              primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                              max_total_runs=12,
                              max_concurrent_runs=4,
                              run_config=src)

## Submit Run

In [7]:
hyperdrive_run = exp.submit(hyperdrive_config, show_output=True)

## Run Details


In [8]:
RunDetails(hyperdrive_run).show()
hyperdrive_run.get_status()
hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_7c025b68-9128-4aee-a1ec-9a170f510247
Web View: https://ml.azure.com/experiments/hyperdrive-experiment/runs/HD_7c025b68-9128-4aee-a1ec-9a170f510247?wsid=/subscriptions/81cefad3-d2c9-4f77-a466-99a7f541c7bb/resourcegroups/aml-quickstarts-132510/workspaces/quick-starts-ws-132510

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-12-30T17:35:31.768087][API][INFO]Experiment created<END>\n""<START>[2020-12-30T17:35:32.235255][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2020-12-30T17:35:32.541689][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2020-12-30T17:35:33.1425835Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_7c025b68-9128-4aee-a1ec-9a170f510247
Web View: https://ml.azure.com/experiments/hyperdrive-experiment/runs/HD_7c025b68-9128-4aee-a1ec-9a170f510247?wsid=/subscri

{'runId': 'HD_7c025b68-9128-4aee-a1ec-9a170f510247',
 'target': 'compute-aml-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-12-30T17:35:31.292361Z',
 'endTimeUtc': '2020-12-30T17:47:50.681747Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '3bf7ecd3-8084-41c1-a7b2-30b5b2e31b76',
  'score': '0.9506416584402764',
  'best_child_run_id': 'HD_7c025b68-9128-4aee-a1ec-9a170f510247_4',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg132510.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_7c025b68-9128-4aee-a1ec-9a170f510247/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=3d%2F7ft4VU9fmVEgi4NDhyiEe3Myld5uZP8FSmp6Xk1I%3D&st=2020-12-30T17%3A38%3A10Z&se=2020-12-31T01%3A48%3A10Z&sp=r'}}

## Best Model

In [9]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics=best_run.get_metrics()
best_run_files=best_run.get_file_names()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run ID',best_run.id)
print('\n Metrics: ', best_run_metrics)
print('\n Parameters: ', parameter_values,sep='\n')
print('\nAccuracy of Best run',best_run_metrics['Accuracy'],sep='\n')
print('\nBest run file names',best_run_files,sep='\n')


best_run.get_file_names()

Best Run ID HD_7c025b68-9128-4aee-a1ec-9a170f510247_4

 Metrics:  {'Num Estimators:': 64.0, 'Max Depth:': 25, 'Min Samples Split:': 15, 'Min Samples Leaf:': 1, 'Accuracy': 0.9506416584402764}

Accuracy of Best run
0.9506416584402764

Best run file names
['azureml-logs/55_azureml-execution-tvmps_d495ebc478eb6fcc08a5f0598db0e6f8f20842f145e000f928c8c4eebfb9f653_d.txt', 'azureml-logs/65_job_prep-tvmps_d495ebc478eb6fcc08a5f0598db0e6f8f20842f145e000f928c8c4eebfb9f653_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_d495ebc478eb6fcc08a5f0598db0e6f8f20842f145e000f928c8c4eebfb9f653_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/94_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/engine_spans_l_47e42f04-75db-4fb9-b3f9-e31ab12098ab.jsonl', 'logs/azureml/dataprep/python_span_l_47e42f04-75db-4fb9-b3f9-e31ab12098ab.jsonl', 'logs/azureml/job

['azureml-logs/55_azureml-execution-tvmps_d495ebc478eb6fcc08a5f0598db0e6f8f20842f145e000f928c8c4eebfb9f653_d.txt',
 'azureml-logs/65_job_prep-tvmps_d495ebc478eb6fcc08a5f0598db0e6f8f20842f145e000f928c8c4eebfb9f653_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_d495ebc478eb6fcc08a5f0598db0e6f8f20842f145e000f928c8c4eebfb9f653_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'logs/azureml/94_azureml.log',
 'logs/azureml/dataprep/backgroundProcess.log',
 'logs/azureml/dataprep/backgroundProcess_Telemetry.log',
 'logs/azureml/dataprep/engine_spans_l_47e42f04-75db-4fb9-b3f9-e31ab12098ab.jsonl',
 'logs/azureml/dataprep/python_span_l_47e42f04-75db-4fb9-b3f9-e31ab12098ab.jsonl',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/model.joblib']

In [10]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
hyperdrive-experiment,HD_7c025b68-9128-4aee-a1ec-9a170f510247_4,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [11]:
best_run.download_file('/outputs/model.joblib', '/hyperdrive-model/hyperdrive_model.joblib')

In [12]:
# Register the best model
model=best_run.register_model( model_name='best_hyperdrive_model',
                    model_path='outputs/model.joblib', # run outputs path
                    description='A best hyperdrive model')