In [1]:
from azureml.train.hyperdrive import BayesianParameterSampling, choice, uniform
from azureml.train.hyperdrive import BanditPolicy
from azureml.core import Workspace
from azureml.core import Experiment, Environment
from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import ScriptRunConfig



In [2]:
ws = Workspace.from_config()

#ws = Workspace(workspace_name = workspace_name,
#               subscription_id = subscription_id,
#               resource_group = resource_group)

In [3]:
param_space = {
                 '--n_estimators': choice(1,5,10, 50, 100,500),
                 '--max_depth': choice(range(1,3))
              }

#Bayesian tries to select parameter combinations that will result in improved performance from the previous selection
param_sampling = BayesianParameterSampling(param_space)


#bandit policy to stop a run if the target performance metric underperforms the best run so far by a specified margin

early_termination_policy = BanditPolicy(slack_amount = 0.2,
                                        evaluation_interval=1,
                                        delay_evaluation=5)

In [4]:
cpu_cluster_name = "cpuclst"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS3_V2', 
                                                           idle_seconds_before_scaledown=3600,
                                                           min_nodes=0, 
                                                           max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

aml_run_config = RunConfiguration()
aml_run_config.target = cpu_cluster

curated_environment = Environment.get(workspace=ws,name="sklearn_20_2")
aml_run_config.environment = curated_environment

script_config = ScriptRunConfig(source_directory='./', script='train.py',run_config=aml_run_config)




Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [5]:

hyperdrive = HyperDriveConfig(run_config=script_config,
                              hyperparameter_sampling=param_sampling,
                              policy=None,
                              primary_metric_name='Accuracy',
                              primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                              max_total_runs=6,
                              max_concurrent_runs=4)

experiment = Experiment(workspace = ws, name = 'hyperdrive_training')
hyperdrive_run = experiment.submit(config=hyperdrive)
hyperdrive_run.wait_for_completion()

For best results with Bayesian Sampling we recommend using a maximum number of runs greater than or equal to 20 times the number of hyperparameters being tuned. Recommendend value:40.


{'runId': 'HD_bdc056d8-8f27-4835-9d07-8bf278a58e9a',
 'target': 'cpuclst',
 'status': 'Completed',
 'startTimeUtc': '2021-10-27T18:40:33.344694Z',
 'endTimeUtc': '2021-10-27T18:44:38.816255Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '2b953dba-b7fb-4ad2-b6d4-dfa7d65f911e',
  'user_agent': 'python/3.6.9 (Linux-5.4.0-1055-azure-x86_64-with-debian-buster-sid) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.33.0',
  'score': '0.8136666666666666',
  'best_child_run_id': 'HD_bdc056d8-8f27-4835-9d07-8bf278a58e9a_3',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://aiwml1892670485.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_bdc056d8-8f27-4835-9d07-8bf278a58e9a/azureml-logs/hyperdrive.txt?sv=2019-07-07&sr=b&si

In [6]:
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best Run Id: ', best_run.id)
print('\n Accuracy:', best_run_metrics['Accuracy'])
print(parameter_values)


Best Run Id:  HD_bdc056d8-8f27-4835-9d07-8bf278a58e9a_3

 Accuracy: 0.8136666666666666
['--n_estimators', '10', '--max_depth', '2']
