# Hyperparameter Tuning using HyperDrive

Import all dependencies.

In [10]:
from azureml.core import Run, Dataset
from azureml.core import Experiment, Webservice, Model

from azureml.core.workspace import Workspace

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

from azureml.core import Environment
from azureml.core.runconfig import DockerConfiguration
from azureml.core import ScriptRunConfig

from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.parameter_expressions import uniform, choice

from azureml.widgets import RunDetails

import os

In [None]:
# Get workspace of this project
ws = Workspace.from_config()

## Dataset

Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

The external dataset is registered and is accessed in the training script. We can take a quick preview of the time serires data.

In [2]:
dataset_name = 'ETHBTC'
dataset = Dataset.get_by_name(workspace=ws, name=dataset_name)

# preview data frame
df = dataset.to_pandas_dataframe()
df

Unnamed: 0,time,open,high,low,close,volume
0,2017-07-14,0.080000,0.091033,0.080000,0.090993,1942.0570
1,2017-07-15,0.090993,0.093699,0.087127,0.087635,4013.0660
2,2017-07-16,0.087508,0.087635,0.075591,0.082241,8904.1580
3,2017-07-17,0.082368,0.088394,0.081699,0.087537,6650.9330
4,2017-07-18,0.087831,0.109068,0.084777,0.107732,7245.7410
...,...,...,...,...,...,...
1513,2021-09-04,0.078778,0.079300,0.077047,0.077832,126072.9194
1514,2021-09-05,0.077837,0.078767,0.075982,0.076316,126794.9485
1515,2021-09-06,0.076315,0.076808,0.074432,0.074550,113725.7970
1516,2021-09-07,0.074551,0.075221,0.069510,0.073272,238944.7002


## Prepare an experiment

In [6]:
experiment_name = 'forecast-ethbtc-hyperdrive'

# create or load an experiment
experiment=Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
forecast-ethbtc-hyperdrive,capstone,Link to Azure Machine Learning studio,Link to Documentation


In [7]:
# create or retrieve a compute target

# Choose a name for your CPU cluster
cpu_cluster_name = "capstone-cluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:        
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2s_V3',
                                                           max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Hyperdrive Configuration


I use Facebook Prophet model to forecast the close price. Two hyperparameters are used for tuning with HyperDrive, each is a discrete option in a random set of 5:

+ `--changepoint-prior-scale` that dictates the flexibility of the automatic changepoint selection. Large values will allow many changepoints, small values will allow few changepoints.
+ `--seasonality-prior-scale` that dictates the strength of the seasonality model. Larger values allow the model to fit larger seasonal fluctuations, smaller values dampen the seasonality.

The termination policy is Bandit Policy that is based on slack factor/slack amount and evaluation interval. The general purpose is to avoid burning the computation resource on the training processes that are unlikely to yield better result. 


The performance metric is Root Mean Squared Error, which means the lower the better; therefore, the primary metric goal is set to `MINIMIZE`.

The custom train environment that has all necessary dependencies is configured through the provided Dockerfile.

In [20]:
# Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

# Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
    '--changepoint-prior-scale': choice(0.1, 0.2, 0.3, 0.4, 0.5),
    '--seasonality-prior-scale': choice(0.1, 0.5, 1, 5, 10)
})


## Create or retrieve an environment for running the script
myenv = Environment(name="myenv")

## Creates the environment using a Docker container specified in Dockerfile.
myenv.docker.base_image = None
myenv.docker.base_dockerfile = "./Dockerfile"
myenv.python.user_managed_dependencies=True
myenv.python.interpreter_path = "/opt/miniconda/bin/python"

# Create an estimator / script run config
script_config = ScriptRunConfig(source_directory='.',
                                script='scripts/train.py',                                
                                compute_target=cpu_cluster,
                                environment=myenv,
                                docker_runtime_config=DockerConfiguration(use_docker=True))

# Create hyperdrive config
hyperdrive_run_config = HyperDriveConfig(run_config=script_config,
                                         hyperparameter_sampling=param_sampling,
                                         policy=early_termination_policy,
                                         primary_metric_name='root_mean_squared_error',
                                         primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
                                         max_total_runs=6,
                                         max_concurrent_runs=4)


In [21]:
# Submit experiment
hyperdrive_run = experiment.submit(config=hyperdrive_run_config)

# Wait for experiment to complete
hyperdrive_run.wait_for_completion(show_output=True)

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

Use the `RunDetails` widget to show the different experiments.

In [12]:
# Get run details
RunDetails(hyperdrive_run).show()

RunId: HD_cf2226dd-7a11-4bec-b7b5-47d7be94b270
Web View: https://ml.azure.com/runs/HD_cf2226dd-7a11-4bec-b7b5-47d7be94b270?wsid=/subscriptions/d876eeb1-1ac0-424f-8c00-6284386d5106/resourcegroups/nanodegree/workspaces/capstone&tid=91db64d0-e9d0-43a4-a34b-2283395ed452

Execution Summary
RunId: HD_cf2226dd-7a11-4bec-b7b5-47d7be94b270
Web View: https://ml.azure.com/runs/HD_cf2226dd-7a11-4bec-b7b5-47d7be94b270?wsid=/subscriptions/d876eeb1-1ac0-424f-8c00-6284386d5106/resourcegroups/nanodegree/workspaces/capstone&tid=91db64d0-e9d0-43a4-a34b-2283395ed452



_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code AQSQCGB58 to authenticate.
Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code AA8AESJ66 to authenticate.
Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code EKRGL5PAN to authenticate.
Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code AS2NN7EXA to authenticate.
Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://mi

## Best Model

Get the best model from the hyperdrive experiments and display all the properties of the model.

In [13]:
# Get your best run
hyperdrive_best_run = hyperdrive_run.get_best_run_by_primary_metric()
hyperdrive_best_run

Experiment,Id,Type,Status,Details Page,Docs Page
forecast-ethbtc-hyperdrive,HD_cf2226dd-7a11-4bec-b7b5-47d7be94b270_0,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [26]:
hyperdrive_best_run_metrics = hyperdrive_best_run.get_metrics()

print('Best Run Id: ', hyperdrive_best_run.id)
print('RMSE:', hyperdrive_best_run_metrics['root_mean_squared_error'])

print()
print('All available performance metrics:')
for metric, value in hyperdrive_best_run_metrics.items():    
    print(metric, '=', value)

Best Run Id:  HD_cf2226dd-7a11-4bec-b7b5-47d7be94b270_0
RMSE: 0.012931551023265521

All available performance metrics:
mean_absolute_percentage_error = 0.23685649884063692
mean_absolute_error = 0.010655761405834897
root_mean_squared_error = 0.012931551023265521


In [24]:
hyperdrive_best_run_parameter_values = hyperdrive_best_run.get_details()['runDefinition']['arguments']

print('Model parameters:')
print(hyperdrive_best_run_parameter_values)

Model parameters:
['--changepoint-prior-scale', '0.1', '--seasonality-prior-scale', '1']


In [17]:
# Save the best model
# Create a model folder in the current directory
os.makedirs('./hyperdrive-outputs', exist_ok=True)

# Download the model from best run's outputs
hyperdrive_best_run.download_file(name='outputs/hyperdrive_model.pkl',
                                  output_file_path='./hyperdrive-outputs/hyperdrive_model.pkl')