# Hyperparameter Tuning using HyperDrive

In [1]:
import azureml
from azureml.core import Workspace, Experiment
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from azureml.core.dataset import Dataset
import os
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
import joblib

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


## Dataset

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n') 

experiment_name = 'heart-failure-hyper-drive-model'

experiment=Experiment(ws, experiment_name)
experiment
run = experiment.start_logging()

path = "https://raw.githubusercontent.com/Arushikha0408/nd00333-capstone/master/heart_failure_clinical_records_dataset.csv"
dataset = Dataset.Tabular.from_delimited_files(path)

quick-starts-ws-138727
aml-quickstarts-138727
southcentralus
a24a24d5-8d87-4c8a-99b6-91ed2d2df51f


In [3]:
# choose a name for your cluster
cluster_name = "cpu-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_D2_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
    
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
print(compute_target.get_status().serialize())

Creating a new compute target...
Creating
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-02-12T20:23:19.590000+00:00', 'errors': None, 'creationTime': '2021-02-12T20:23:15.361955+00:00', 'modifiedTime': '2021-02-12T20:23:31.753932+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


## Hyperdrive Configuration
 
 I chose a custom-coded model — a standard Scikit-learn Logistic Regression for this experiment. Logistic Regression is a classification algorithm that is used to predict the probability of a categorical dependent variable. In the case of this capstone experiment, I chose the model because the decision boundary of logistic regression model is a linear binary classifier that seperate the two classes I want to predict using a hyperdrive service.

The parameters I used for the hyperparameter search are:

Regularization Strength (C) with range 0.1 to 1.0 -- Inverse of regularization strength. Smaller values cause stronger regularization

Max Iterations (max_iter) with values 50, 100, 150 and 200 -- Maximum number of iterations to converge.

In [8]:
#Specify a policy 
early_termination_policy = BanditPolicy(evaluation_interval=2,slack_factor=0.2)

parameter_sampling = RandomParameterSampling(
                    {
                        "--C":uniform(0.05,0.10),
                        "--max_iter":choice(25,50,75,100) 
                    }
)


# Create a SKLearn estimator for use with train.py
from azureml.core import ScriptRunConfig
from azureml.core import Environment

training_script='train.py'

est=SKLearn(source_directory='.',entry_script=training_script,compute_target=compute_target)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config =  HyperDriveConfig(estimator=est,
                              hyperparameter_sampling=parameter_sampling,
                              policy=early_termination_policy,
                              primary_metric_name="Accuracy",
                              primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                              max_total_runs=4,
                              max_concurrent_runs=4
                                     )



In [9]:
#TODO: Submit your experiment

hyperdrive_run = experiment.submit(hyperdrive_config,show_output=True)



## Run Details

In [10]:
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_3c039c6c-a87f-4128-a627-23e130572f9c
Web View: https://ml.azure.com/experiments/heart-failure-hyper-drive-model/runs/HD_3c039c6c-a87f-4128-a627-23e130572f9c?wsid=/subscriptions/a24a24d5-8d87-4c8a-99b6-91ed2d2df51f/resourcegroups/aml-quickstarts-138727/workspaces/quick-starts-ws-138727

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-02-12T20:26:14.839943][API][INFO]Experiment created<END>\n""<START>[2021-02-12T20:26:15.239279][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-02-12T20:26:15.378313][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-02-12T20:26:16.9967659Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_3c039c6c-a87f-4128-a627-23e130572f9c
Web View: https://ml.azure.com/experiments/heart-failure-hyper-drive-model/runs/HD_3c039c6c-a87f-4128-a627-23e130

{'runId': 'HD_3c039c6c-a87f-4128-a627-23e130572f9c',
 'target': 'cpu-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-02-12T20:26:14.579337Z',
 'endTimeUtc': '2021-02-12T20:33:46.660767Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '0c2e64cf-17cb-40bf-82db-ecea86e48cc5',
  'score': '0.7666666666666667',
  'best_child_run_id': 'HD_3c039c6c-a87f-4128-a627-23e130572f9c_2',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg138727.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_3c039c6c-a87f-4128-a627-23e130572f9c/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=4%2FFMt3%2Fd6bVVtQOJcyx49K6KaQmvV3icpuPVo1Xo7wo%3D&st=2021-02-12T20%3A23%3A58Z&se=2021-02-13T04%3A33%3A58Z&sp=r'},
 'submittedBy': 'ODL_User 138727'}

## Best Model

In [11]:
# Get your best run.

### YOUR CODE HERE ###
best_run = hyperdrive_run.get_best_run_by_primary_metric()
print("Best run id : ",best_run.id)
best_run_metrics=best_run.get_metrics()
print(best_run_metrics)

Best run id :  HD_3c039c6c-a87f-4128-a627-23e130572f9c_2
{'Regularization Strength:': 0.09365669925314991, 'Max iterations:': 75, 'Accuracy': 0.7666666666666667}


In [12]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-hyper-drive-model,HD_3c039c6c-a87f-4128-a627-23e130572f9c_2,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [13]:
print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print(best_run.get_tags())

Best Run Id:  HD_3c039c6c-a87f-4128-a627-23e130572f9c_2

 Accuracy: 0.7666666666666667
{'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}'}


In [14]:
#TODO: Save the best model
model = best_run.register_model(model_name='heart-failure-prediction-hyperdrive-model',model_path='outputs/hd-model.joblib', tags={'Method':'Hyperdrive'}, properties={'Accuracy': best_run_metrics['Accuracy']})
print(model)

Model(workspace=Workspace.create(name='quick-starts-ws-138727', subscription_id='a24a24d5-8d87-4c8a-99b6-91ed2d2df51f', resource_group='aml-quickstarts-138727'), name=heart-failure-prediction-hyperdrive-model, id=heart-failure-prediction-hyperdrive-model:1, version=1, tags={'Method': 'Hyperdrive'}, properties={'Accuracy': '0.7666666666666667'})


In [None]:
#cluster_name.delete()