# Hyperparameter Tuning using HyperDrive

Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
from azureml.core import Workspace, Experiment
from azureml.train.sklearn import SKLearn 
from azureml.train.hyperdrive.run import PrimaryMetricGoal 
from azureml.train.hyperdrive.policy import BanditPolicy 
from azureml.train.hyperdrive.sampling import RandomParameterSampling 
from azureml.train.hyperdrive.runconfig import HyperDriveConfig 
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.widgets import RunDetails 

## Dataset

Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [2]:
ws = Workspace.from_config()
experiment_name = 'Hyper-drive-experiment'
experiment=Experiment(ws, experiment_name)

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute

# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

from azureml.core.compute_target import ComputeTargetException 

# Choose a name for your CPU cluster 
cpu_cluster_name = "notebook142329"

# Verify that cluster does not exist already 
try: 
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.') 
except ComputeTargetException: 
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4) 
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config) 

cpu_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Hyperdrive Configuration


In [4]:
# Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1) 

# Create the different params that you will be using during training
param_sampling = RandomParameterSampling({ 
    "--learning_rate": uniform(0.01, 0.3),
    "--epochs": choice(4, 8, 16, 32),
    "--neurons": choice(48, 60, 72, 84, 96)}) 
                              

# Create your estimator and hyperdrive config
estimator = SKLearn(source_directory=".", compute_target=cpu_cluster, entry_script='entry.py', pip_packages=['azureml-dataprep', 'tensorflow']) 

hyperdrive_run_config = HyperDriveConfig(estimator=estimator,  
                                     hyperparameter_sampling=param_sampling,  
                                     policy=early_termination_policy,  
                                     primary_metric_name='accuracy',  
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,  
                                     max_total_runs=32, 
                                     max_concurrent_runs=2)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.
'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [5]:
# Submit your experiment
hdr = experiment.submit(config=hyperdrive_run_config)



## Run Details

In the cell below, use the `RunDetails` widget to show the different experiments.

In [6]:
RunDetails(hdr).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [7]:
hdr.wait_for_completion() 

{'runId': 'HD_4ffc3aee-14e5-402b-abd8-8f95b70086f7',
 'target': 'notebook142329',
 'status': 'Completed',
 'startTimeUtc': '2021-04-17T18:12:05.059655Z',
 'endTimeUtc': '2021-04-17T18:43:26.922012Z',
 'properties': {'primary_metric_config': '{"name": "accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '918d438c-c89e-464a-9ce6-40009830b5ac',
  'score': '0.8666666746139526',
  'best_child_run_id': 'HD_4ffc3aee-14e5-402b-abd8-8f95b70086f7_2',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg142959.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_4ffc3aee-14e5-402b-abd8-8f95b70086f7/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=LkJBOK%2FSVrbVXU1E4EpvwBNvw8tuZ1O4j%2FGgv2LWKZM%3D&st=2021-04-17T18%3A33%3A54Z&se=2021-04-18T02%3A43%3A54Z&sp=r'},
 'submittedBy': 'ODL_User 142959

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [8]:
best_run = hdr.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()

print('Best run id:', best_run.id) 
print('\n Accuracy:', best_run_metrics['accuracy'])  
print('\n Learning rate:', best_run_metrics[r'learning_rate:'])  
print('\n Number of neurons in hidden layer', best_run_metrics[r'neurons:'])
print('\n Number of epochs', best_run_metrics[r'epochs:'])

Best run id: HD_4ffc3aee-14e5-402b-abd8-8f95b70086f7_2

 Accuracy: 0.8666666746139526

 Learning rate: 0.2978839837686683

 Number of neurons in hidden layer 84.0

 Number of epochs 8.0


In [9]:
print(best_run.get_file_names())

['azureml-logs/55_azureml-execution-tvmps_6a809841eb479eb1c02c2e1139b820f757e84c259b1662c6901beb7cd18367b5_d.txt', 'azureml-logs/65_job_prep-tvmps_6a809841eb479eb1c02c2e1139b820f757e84c259b1662c6901beb7cd18367b5_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_6a809841eb479eb1c02c2e1139b820f757e84c259b1662c6901beb7cd18367b5_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'logs/azureml/105_azureml.log', 'logs/azureml/dataprep/backgroundProcess.log', 'logs/azureml/dataprep/backgroundProcess_Telemetry.log', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/model/saved_model.pb', 'outputs/model/variables/variables.data-00000-of-00001', 'outputs/model/variables/variables.index']


In [10]:
registered_model = best_run.register_model(model_name='hyperdrive-best-model', model_path='outputs/model/saved_model.pb')

In [19]:
# Save conda environment
best_run_env = best_run.get_environment()
best_run_env.save_to_directory("hyper-drive-conda environment")


## Model Deployment
The auto ML model performance was better than the hyperdrive model, hence the auto ML model has been deployed