# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [12]:
from azureml.core import Workspace, Experiment, ScriptRunConfig
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice

## Dataset

In [13]:
ws = Workspace.from_config()
experiment_name = 'FetalHealth_HypExp'

experiment=Experiment(ws, experiment_name)

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code FL6HWMW26 to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


In [14]:
from azureml.core.compute import ComputeTarget, AmlCompute

compute_name = os.environ.get('UDACITY_AML_COMPUTE_CLUSTER_NAME', 'FHCapstoneCompute')
compute_min_nodes = os.environ.get('UDACITY_AML_COMPUTE_CLUSTER_MIN_NODES', 0)
compute_max_nodes = os.environ.get('UDACITY_AML_COMPUTE_CLUSTER_MAX_NODES', 4)

vm_size = os.environ.get('UDACITY_AML_COMPUTE_CLUSTER_SKU', 'STANDARD_D2_V2')


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    print(compute_name+ ' already exist.')
else:
    provisioning_config = AmlCompute.provisioning_configuration(vm_size=vm_size,
                                                                min_nodes=compute_min_nodes, 
                                                                max_nodes=compute_max_nodes)

    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

    print(compute_target.get_status().serialize())

FHCapstoneCompute already exist.


In [15]:
%%writefile conda_dependencies.yml

dependencies:
- python=3.6.2
- scikit-learn
- pip:
  - azureml-defaults

Overwriting conda_dependencies.yml


In [16]:
from azureml.core import Environment

sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')

## Hyperdrive Configuration



In [17]:
from azureml.widgets import RunDetails
from azureml.core import Environment
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform,normal
import os
# Specify parameter sampler


param_sampling = RandomParameterSampling( {            
        "--n_estimators": (20)
    }
)

# Specify a Policy
policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

if "training" not in os.listdir():
    os.mkdir("./training")


config = ScriptRunConfig(source_directory='./training',
                         script='train.py',
                         arguments=['n_estimators'],
                         compute_target=compute_target,
                         environment=sklearn_env)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config = config,
                              hyperparameter_sampling=param_sampling,
                              policy=policy,
                              primary_metric_name='Accuracy',
                              primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                              max_total_runs=100,
                              max_concurrent_runs=4)

In [18]:
from azureml.core import Workspace, Experiment

hyper_run = experiment.submit(config=hyperdrive_config)

## Run Details

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [19]:
RunDetails(hyper_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [20]:
import joblib
# Get your best run and save the model from that run.

best_hyper_run = hyper_run.get_best_run_by_primary_metric()
best_hyper_run_metrics = best_hyper_run.get_metrics()
parameter_values = best_hyper_run.get_details() ['runDefinition']['arguments']

os.makedirs("./outputs", exist_ok=True)
joblib.dump(value=best_hyper_run.id,filename='outputs/best_hyper_run_model.joblib')
print("Model successfully saved!")

print('Best Run Id: ', best_hyper_run.id)
print('\n Accuracy:', best_hyper_run_metrics['Accuracy'])

Model successfully saved!
Best Run Id:  HD_b2415cd2-7a41-401a-a562-20ca7a438a0e_0

 Accuracy: 0.9357366771159875


In [21]:
# Registers best model with metrics information
model = best_hyper_run.register_model(model_name='bestHyperDrive_model', model_path='outputs/')