# Hyperparameter Tuning using HyperDrive

In [1]:
# Common Imports
import joblib
from azureml.core import Workspace, Experiment, Dataset, Environment, ScriptRunConfig
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.experiment import Experiment
from azureml.widgets import RunDetails

# Hyperdrive
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform

## Prepare for Experiment

In [2]:
# Load the workspace information from config.json using the Azure ML SDK
ws = Workspace.from_config()
ws.name

'DataScience'

In [3]:
# Let's make sure we have a compute resource
cluster_name = "Low-End-Compute-Cluster"
max_nodes = 4

# Fetch or create the compute resource
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name) # This will throw a ComputeTargetException if this doesn't exist
    print('Using existing compute: ' + cluster_name)
except ComputeTargetException:
    # Create the cluster
    print('Provisioning cluster...')
    compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_D2DS_V4", min_nodes=0, max_nodes=max_nodes)
    cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

# Ensure the cluster is ready to go
cpu_cluster.wait_for_completion(show_output=True)

Using existing compute: Low-End-Compute-Cluster
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [4]:
# Create a Machine Learning Experiment
experiment_name = 'NHL-Penalty-Minute-Prediction'

experiment=Experiment(ws, experiment_name)

## Dataset

The external dataset I'm using requires some data preparation. This preparation is handled in `dataprep.ipynb` and the prepped dataset is registered on Azure under the 'NHL-Penalities-2020' name. Fetching it by this name allows this notebook to be reliant on that external data without being tethered to its process, should the data cleaning phase change.

In [5]:
# Next, grab our dataset from Azure
ds = Dataset.get_by_name(workspace=ws, name='NHL-Penalties-2020')
print(ds.name + ' v' + str(ds.version) + ': ' + ds.description)

# Display the data structure here for verification
ds.to_pandas_dataframe().head()

NHL-Penalties-2020 v4: A breakdown of penalty minutes per game matchup


Unnamed: 0,penaltyMinutes,type,homeTeam,awayTeam
0,12.0,R,Stars,Avalanche
1,29.0,R,Stars,Avalanche
2,18.0,R,Stars,Avalanche
3,24.0,R,Stars,Avalanche
4,4.0,R,Stars,Avalanche


## Hyperdrive Configuration

I am using a LinearRegression model because it is commonly used and simple to configure.

Because there are not many parameters available, I am using Random Parameter Sampling since a Grid search is not possible with uniform variables and Bayesian feels like overkill for these needs.

I am, however, using a standard bandit termination policy to keep runs from getting out of hand.

Finally, I chose R Squared as my primary metric because this is the primary output of the LinearRegression training process and it is a fairly reliable metric overall.

In [6]:
# Terminate runs if they appear to have stalled
policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=10)

# Create the different params that you will be using during training
ps = RandomParameterSampling({
    "normalize": choice(0, 1),   # Whether or not the linear regression should normalize
    "fit": choice(0, 1),         # Whether or not the linear regression should be fit to the intercept
    "split": uniform(0.05, 0.5), # Train / Test Split percentage to reserve for validation
})

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(source_directory='train/', 
                      script='train.py',
                      run_config=None,
                      compute_target=cpu_cluster,
                      environment=sklearn_env)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
max_runs = 30
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=ps, 
                                     primary_metric_name='R Squared', 
                                     primary_metric_goal=PrimaryMetricGoal.MINIMIZE, 
                                     max_total_runs=max_runs, 
                                     max_concurrent_runs=max_nodes,                                      
                                     policy=policy)

In [7]:
# Actually run the experiment
hyperdrive_run = experiment.submit(hyperdrive_config)
print(str(hyperdrive_run.id))

# Show progress on the run
RunDetails(hyperdrive_run).show()

# Wait for the run to complete
hyperdrive_run.wait_for_completion(show_output=False)

HD_c03f3471-e829-401c-8baf-52dc2dab7c9a


_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

{'runId': 'HD_c03f3471-e829-401c-8baf-52dc2dab7c9a',
 'target': 'Low-End-Compute-Cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-04-19T02:49:12.122307Z',
 'endTimeUtc': '2022-04-19T03:07:24.697279Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "R Squared", "goal": "minimize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '1845b53a-8b64-40c1-8fbc-5ddc7f0b0466',
  'user_agent': 'python/3.8.8 (Windows-10-10.0.22000-SP0) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.40.0',
  'space_size': 'infinite_space_size',
  'score': '-0.03804473799451702',
  'best_child_run_id': 'HD_c03f3471-e829-401c-8baf-52dc2dab7c9a_17',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://datascience7947653020.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_c03f3471-e829-401c-8baf-52dc2da

## Run Details

In [8]:
# Get the Best Run
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
NHL-Penalty-Minute-Prediction,HD_c03f3471-e829-401c-8baf-52dc2dab7c9a_17,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [9]:
# Display the hyperparameters for the best run
best_run.get_details()['runDefinition']['arguments']

['--fit', '0', '--normalize', '0', '--split', '0.08048705423418313']

In [10]:
# Display the metrics for the best run
best_run.get_metrics()

{'Normalize:': False,
 'Fit:': False,
 'Split %:': 0,
 'R Squared': -0.03804473799451702}

## Best Model

In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [11]:
# Downloads all files from the best run, including the model
best_run.download_files(output_directory='hyperdrive-output')

In [12]:
# Load the model
hyperdrive_model_path = 'hyperdrive-output\outputs\model.pkl'
with open(hyperdrive_model_path, 'rb') as f:
    loaded_hyperdrive_model = joblib.load(f)

loaded_hyperdrive_model

Trying to unpickle estimator LinearRegression from version 0.24.2 when using version 0.22.2.post1. This might lead to breaking code or invalid results. Use at your own risk.


LinearRegression(copy_X=False, fit_intercept=False, n_jobs=-1, normalize=False)

## Model Registration

In [13]:
# Register the best model
best_run.register_model(model_name='NHL-Penalties-Hyperdrive', model_path='outputs/model.pkl', description='NHL Game Penalty Prediction Best Hyperdrive Run')

Model(workspace=Workspace.create(name='DataScience', subscription_id='efba8785-116c-4443-9a05-764c75c7bb0d', resource_group='datascience'), name=NHL-Penalties-Hyperdrive, id=NHL-Penalties-Hyperdrive:2, version=2, tags={}, properties={})