In [3]:
from azureml.core import Workspace, Experiment

# Access the workspace using config.json
print("Accessing the workspace from job....")
ws = Workspace.from_config("./config")


# Get the input dataset
print("Accessing the Defaults dataset...")
input_ds = ws.datasets.get('default')


from azureml.core import Environment
from azureml.core.environment import CondaDependencies

# Create the environment
myenv = Environment(name="MyEnvironment")

# Create the dependencies object
myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn', 'pip', 'pandas'],
                                     pip_packages=['azureml-defaults'])

myenv.python.conda_dependencies = myenv_dep

# Register the environment
print("Registering the environment...")
myenv.register(ws)

# Specify the cluster name
cluster_name = "my-cluster-001"

# Provisioning configuration using AmlCompute
from azureml.core.compute import AmlCompute

print("Accessing the compute cluster...")

if cluster_name not in ws.compute_targets:
    print("Creating the compute cluster with name: ", cluster_name)
    compute_config = AmlCompute.provisioning_configuration(
                                     vm_size="STANDARD_D11_V2",
                                     max_nodes=2)

    cluster = AmlCompute.create(ws, cluster_name, compute_config)
    cluster.wait_for_completion()
else:
    cluster = ws.compute_targets[cluster_name]
    print(cluster_name, ", compute cluster found. Using it...")


# Scriptを動かす
from azureml.core import ScriptRunConfig
script_config = ScriptRunConfig(source_directory="./script",
                                script="320 - hyperdrive.py",
                                arguments = ['--input-data', input_ds.as_named_input('raw_data')],
                                environment=myenv,
                                compute_target=cluster)


# ハイパードライブのパラメータを作成する
from azureml.train.hyperdrive import GridParameterSampling, choice

hyper_params = GridParameterSampling(
                {'--n_estimators': choice(10, 20, 50, 100),
                 '--min_samples_leaf': choice(1, 2, 5)
                 })


# ハイパードライブクラスを作成する
from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal

hyper_config = HyperDriveConfig(run_config=script_config,
                                hyperparameter_sampling=hyper_params,
                                policy=None,
                                primary_metric_name='accuracy',
                                primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                max_total_runs=20,
                                max_concurrent_runs=2)

# Create the experiment and run
new_experiment = Experiment(workspace=ws, name='Hyperdrive_Exp001')
new_run = new_experiment.submit(config=hyper_config)
new_run.wait_for_completion(show_output=True)

Accessing the workspace from job....
Accessing the Defaults dataset...
Registering the environment...
Accessing the compute cluster...
my-cluster-001 , compute cluster found. Using it...
RunId: HD_6d6430f5-c125-40c3-ab8f-6b1683db9c34
Web View: https://ml.azure.com/runs/HD_6d6430f5-c125-40c3-ab8f-6b1683db9c34?wsid=/subscriptions/3467f739-a57b-4612-9de8-72a6616c01b3/resourcegroups/AzuremlSDKRG00/workspaces/Azureml-SDK-WS01&tid=bcd8db96-8bb9-4f0d-af35-e471bf92c072

Streaming azureml-logs/hyperdrive.txt

[2022-10-05T16:04:55.053458][GENERATOR][INFO]Trying to sample '2' jobs from the hyperparameter space
[2022-10-05T16:04:55.8565003Z][SCHEDULER][INFO]Scheduling job, id='HD_6d6430f5-c125-40c3-ab8f-6b1683db9c34_0' 
[2022-10-05T16:04:55.9413149Z][SCHEDULER][INFO]Scheduling job, id='HD_6d6430f5-c125-40c3-ab8f-6b1683db9c34_1' 
[2022-10-05T16:04:56.1193250Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_6d6430f5-c125-40c3-ab8f-6b1683db9c34_0' 
[2022-10-05T16:04:56.3902952Z][SCHEDULER][INFO

{'runId': 'HD_6d6430f5-c125-40c3-ab8f-6b1683db9c34',
 'target': 'my-cluster-001',
 'status': 'Completed',
 'startTimeUtc': '2022-10-05T16:04:54.250199Z',
 'endTimeUtc': '2022-10-05T16:21:59.820456Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"accuracy","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '43064fe5-570a-4ec8-b912-7cbb6fe4ebe0',
  'user_agent': 'python/3.8.8 (Windows-10-10.0.19041-SP0) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.46.0',
  'space_size': '12',
  'score': '0.7896995708154506',
  'best_child_run_id': 'HD_6d6430f5-c125-40c3-ab8f-6b1683db9c34_6',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_6d6430f5-c125-40c3-ab8f-6b1683db9c34_6'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryValues': {'amlClientType': 'azureml-sd

In [4]:
# Best hyperdrive run with best combination of hyperparameter
best_run = new_run.get_best_run_by_primary_metric()

print("Best Run ID : ", best_run.id)
print(best_run.get_metrics())

Best Run ID :  HD_6d6430f5-c125-40c3-ab8f-6b1683db9c34_6
{'accuracy': 0.7896995708154506}
