# Hyperparameter Tuning using HyperDrive

In [1]:
from azureml.core import Workspace, Experiment,Dataset
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

from azureml.widgets import RunDetails
from azureml.core import ScriptRunConfig
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice

import os
import joblib

In [2]:
currDir=os.getcwd()
print(currDir)
os.listdir(currDir)

/mnt/batch/tasks/shared/LS_root/mounts/clusters/compute-aml-cluster/code


['.ipynb_checkpoints',
 'automl.ipynb',
 'conda_dependencies.yml',
 'CreditCardChurners.csv',
 'hyperparameter_tuning.ipynb',
 'README.md',
 'train.py',
 'udacity-project.ipynb']

## Initialize Workspace and Create an Azure ML experiment

In [3]:
ws = Workspace.from_config()
exp = Experiment(workspace=ws, name = 'hyperdrive-experiment')
project_folder = './hyperdrive-model'

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-132953
Azure region: southcentralus
Subscription id: a0a76bad-11a1-4a2d-9887-97a29122c8ed
Resource group: aml-quickstarts-132953


## Dataset

In [4]:
dataset=Dataset.get_by_name(ws,name="Credit-Card-Churners")
dataset

{
  "source": [
    "https://raw.githubusercontent.com/Apurva0109/nd00333-capstone/main/starter_file/CreditCardChurners.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ],
  "registration": {
    "id": "1a886fb9-f00a-4135-80c6-0cc42c81f355",
    "name": "Credit-Card-Churners",
    "version": 1,
    "workspace": "Workspace.create(name='quick-starts-ws-132953', subscription_id='a0a76bad-11a1-4a2d-9887-97a29122c8ed', resource_group='aml-quickstarts-132953')"
  }
}

## Create or Attach an AmlCompute cluster

In [5]:
compute_name = "compute-aml-cluster"

# Check if the compute target exists
try:
    compute_aml_cluster = ComputeTarget(workspace=ws, name=compute_name)
    print('Found existing cluster.')
except ComputeTargetException:
    # If not, create it
    compute_config = AmlCompute.provisionibng_configuration(vm_size='STANDARD_DS12_V2',
                                                           max_nodes=5)
    compute_aml_cluster = ComputeTarget.create(ws, compute_name, compute_config)

compute_aml_cluster.wait_for_completion(show_output=True)

Found existing cluster.

Running


## Environment setup

In [6]:
from azureml.core import Environment
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

## Hyperdrive Configuration

In [7]:
# Specify parameter sampler
param_space = {
         '--n_estimators': choice(2, 4, 8, 16, 32, 64, 200),
         '--max_depth': choice(5, 8, 15, 25, 30),
         '--min_samples_split': choice(2, 5, 10, 15, 100),
         '--min_samples_leaf': choice(1, 2, 5, 10)
    
     }

ps = RandomParameterSampling(param_space)

# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1,
                      evaluation_interval=2,
                      delay_evaluation=0)

# if "training" not in os.listdir():
#     os.mkdir("./training")

#Create your estimator and hyperdrive config
src = ScriptRunConfig(source_directory='.',
                      script='train.py',
                      compute_target=compute_aml_cluster,
                      environment=sklearn_env)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config =HyperDriveConfig(
                              hyperparameter_sampling=ps,
                              policy=policy,
                              primary_metric_name='Accuracy',
                              primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                              max_total_runs=12,
                              max_concurrent_runs=4,
                              run_config=src)

## Submit Run

In [8]:
hyperdrive_run = exp.submit(hyperdrive_config, show_output=True)

## Run Details


In [9]:
RunDetails(hyperdrive_run).show()
hyperdrive_run.get_status()
hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_af5902d7-2cf1-43f1-8d97-19ce4adbe62e
Web View: https://ml.azure.com/experiments/hyperdrive-experiment/runs/HD_af5902d7-2cf1-43f1-8d97-19ce4adbe62e?wsid=/subscriptions/a0a76bad-11a1-4a2d-9887-97a29122c8ed/resourcegroups/aml-quickstarts-132953/workspaces/quick-starts-ws-132953

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-01-02T14:40:10.440463][API][INFO]Experiment created<END>\n""<START>[2021-01-02T14:40:10.872500][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-01-02T14:40:11.098771][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-01-02T14:40:11.6036152Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_af5902d7-2cf1-43f1-8d97-19ce4adbe62e
Web View: https://ml.azure.com/experiments/hyperdrive-experiment/runs/HD_af5902d7-2cf1-43f1-8d97-19ce4adbe62e?wsid=/subscri

{'runId': 'HD_af5902d7-2cf1-43f1-8d97-19ce4adbe62e',
 'target': 'compute-aml-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-01-02T14:40:10.217402Z',
 'endTimeUtc': '2021-01-02T14:52:16.265963Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '396c04c1-e938-40af-9093-190489747c58',
  'score': '0.9491609081934848',
  'best_child_run_id': 'HD_af5902d7-2cf1-43f1-8d97-19ce4adbe62e_2',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg132953.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_af5902d7-2cf1-43f1-8d97-19ce4adbe62e/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=8eSp%2FDf0DOThfx6QPw2oQAaWJdVEcyQsZmBiAdb6FPs%3D&st=2021-01-02T14%3A42%3A18Z&se=2021-01-02T22%3A52%3A18Z&sp=r'}}

## Best Model

In [10]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics=best_run.get_metrics()
best_run_files=best_run.get_file_names()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run ID',best_run.id)
print('\n Metrics: ', best_run_metrics)
print('\n Parameters: ', parameter_values,sep='\n')
print('\nAccuracy of Best run',best_run_metrics['Accuracy'],sep='\n')
print('\nBest run file names',best_run_files,sep='\n')


best_run.get_file_names()

Best Run ID HD_af5902d7-2cf1-43f1-8d97-19ce4adbe62e_2

 Metrics:  {'Num Estimators:': 32.0, 'Max Depth:': 30, 'Min Samples Split:': 10, 'Min Samples Leaf:': 5, 'Accuracy': 0.9491609081934848}

 Parameters: 
['--max_depth', '30', '--min_samples_leaf', '5', '--min_samples_split', '10', '--n_estimators', '32']

Accuracy of Best run
0.9491609081934848

Best run file names
['azureml-logs/55_azureml-execution-tvmps_6dc4414c6e80373202f4fb4b5b5e87aba6062797030fe3c6ff1b3819ca71c854_d.txt', 'azureml-logs/65_job_prep-tvmps_6dc4414c6e80373202f4fb4b5b5e87aba6062797030fe3c6ff1b3819ca71c854_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_6dc4414c6e80373202f4fb4b5b5e87aba6062797030fe3c6ff1b3819ca71c854_d.txt', 'logs/azureml/98_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/dataprep/engine_spans_l_b9503fba-b28a-44a6-99ed-5198af18fd16.jsonl', 'logs/azureml/dataprep/python_span_l_b9503fba-b28a-4

['azureml-logs/55_azureml-execution-tvmps_6dc4414c6e80373202f4fb4b5b5e87aba6062797030fe3c6ff1b3819ca71c854_d.txt',
 'azureml-logs/65_job_prep-tvmps_6dc4414c6e80373202f4fb4b5b5e87aba6062797030fe3c6ff1b3819ca71c854_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_6dc4414c6e80373202f4fb4b5b5e87aba6062797030fe3c6ff1b3819ca71c854_d.txt',
 'logs/azureml/98_azureml.log',
 'logs/azureml/dataprep/backgroundProcess.log',
 'logs/azureml/dataprep/backgroundProcess_Telemetry.log',
 'logs/azureml/dataprep/engine_spans_l_b9503fba-b28a-44a6-99ed-5198af18fd16.jsonl',
 'logs/azureml/dataprep/python_span_l_b9503fba-b28a-44a6-99ed-5198af18fd16.jsonl',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/model.joblib']

In [11]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
hyperdrive-experiment,HD_af5902d7-2cf1-43f1-8d97-19ce4adbe62e_2,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [13]:
best_run.download_file('/outputs/model.joblib', 'hyperdrive_model.joblib')

In [14]:
# Register the best model
model=best_run.register_model( model_name='best_hyperdrive_model',
                    model_path='outputs/model.joblib', # run outputs path
                    description='A best hyperdrive model')