In [20]:
from azureml.core import Workspace, Experiment

print("Accessing the workspace from job...")
ws = Workspace.from_config("./config")

print("Accessing the Defaults dataset...")
input_ds = ws.datasets.get("gfdhnbv")

Accessing the workspace from job...
Accessing the Defaults dataset...


In [21]:
from azureml.core import Environment
from azureml.core.environment import CondaDependencies

myenv = Environment(name="Myenv")
myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn', "pip", "pandas"],
                                     pip_packages=['azureml-defaults'])
myenv.python.conda_dependencies = myenv_dep

print("Registering the environmenr...")
myenv.register(ws)

Registering the environmenr...


{
    "assetId": "azureml://locations/westus/workspaces/54b058d1-e724-4986-aece-5eb3256cb2e6/environments/Myenv/versions/2",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20240304.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "Myenv",
    "python": {
        

In [22]:
cluster_name = "my-cluster-001"
from azureml.core.compute import AmlCompute

print("Accessing the compute cluster...")

if cluster_name not in ws.compute_targets:
    print("Creating the compute cluster with name: ", cluster_name)
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D11_V2", max_nodes=2)
    cluster = AmlCompute.create(ws, cluster_name, compute_config)
    cluster.wait_for_completion()
else:
    cluster = ws.compute_targets[cluster_name]
    print(cluster_name, ", compute cluster found. Using it...")

Accessing the compute cluster...
my-cluster-001 , compute cluster found. Using it...


In [23]:
from azureml.core import ScriptRunConfig

script_config = ScriptRunConfig(source_directory=".",
                                script="320 - hyperdrive script.py",
                                arguments = ["--input-data", input_ds.as_named_input("raw_data")],
                                environment=myenv,
                                compute_target=cluster)

In [24]:
# Create Hyper drive parameters

from azureml.train.hyperdrive import GridParameterSampling, choice, HyperDriveConfig, PrimaryMetricGoal

hyper_params = GridParameterSampling({"--n_estimators": choice(10, 20, 50, 100),
                                      "--min_samples_leaf": choice(1, 2, 5)})

# Configure Hyperdrive class

hyper_config = HyperDriveConfig(run_config=script_config,
                                hyperparameter_sampling=hyper_params,
                                policy=None,
                                primary_metric_name="accuracy",
                                primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                max_total_runs=20,
                                max_concurrent_runs=2)

In [25]:
new_experiment = Experiment(workspace=ws, name="HyperExp1")
new_run = new_experiment.submit(config=hyper_config)
new_run.wait_for_completion(show_output=True)

RunId: HD_6cd40be4-dd11-4c43-a3ec-bb30728a3539
Web View: https://ml.azure.com/runs/HD_6cd40be4-dd11-4c43-a3ec-bb30728a3539?wsid=/subscriptions/49e690c2-0fc4-42a6-ba64-9ea0fa04ddda/resourcegroups/AzureMLsdkgroup/workspaces/slavasdkworkspace&tid=77487836-da45-4554-ad65-1775dca67ca5

Streaming azureml-logs/hyperdrive.txt

[2024-04-06T15:31:43.654239][GENERATOR][INFO]Trying to sample '2' jobs from the hyperparameter space
[2024-04-06T15:31:44.1807089Z][SCHEDULER][INFO]Scheduling job, id='HD_6cd40be4-dd11-4c43-a3ec-bb30728a3539_0' 
[2024-04-06T15:31:44.148820][GENERATOR][INFO]Successfully sampled '2' jobs, they will soon be submitted to the execution target.
[2024-04-06T15:31:44.3517894Z][SCHEDULER][INFO]Scheduling job, id='HD_6cd40be4-dd11-4c43-a3ec-bb30728a3539_1' 
[2024-04-06T15:31:44.8509611Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_6cd40be4-dd11-4c43-a3ec-bb30728a3539_0' 
[2024-04-06T15:31:44.8831634Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_6cd40be4-dd11-4c43

{'runId': 'HD_6cd40be4-dd11-4c43-a3ec-bb30728a3539',
 'target': 'my-cluster-001',
 'status': 'Completed',
 'startTimeUtc': '2024-04-06T15:31:42.720915Z',
 'endTimeUtc': '2024-04-06T15:41:20.973599Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"accuracy","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '168cfd91-0e6c-4b08-bf36-109a148ceee7',
  'user_agent': 'python/3.9.16 (Windows-10-10.0.19045-SP0) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.55.0',
  'space_size': '12',
  'best_child_run_id': 'HD_6cd40be4-dd11-4c43-a3ec-bb30728a3539_5',
  'score': '0.7817796610169492',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_6cd40be4-dd11-4c43-a3ec-bb30728a3539_5'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryValues': {'amlClientType': 'azureml-s

In [26]:
best_run = new_run.get_best_run_by_primary_metric()
print("Best run ID : ", best_run.id)
print(best_run.get_metrics())

Best run ID :  HD_6cd40be4-dd11-4c43-a3ec-bb30728a3539_5
{'accuracy': 0.7817796610169492}
