<a href="https://colab.research.google.com/github/FranciscoOcampoPredictiva/azureml_course/blob/main/Lecture_1_Configure_the_Hyperdrive_Run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 1- Installation and Setup

In [None]:
# Install azureml SDK package
! pip install -q azureml-sdk

In [None]:
# Importing the class
from azureml.core import Workspace

In [None]:
# Access the workspace from config file and creating a workspace object
ws = Workspace.from_config(path='/content/config.json')

# Step 2 - Accessing the input data

In [None]:
input_ds = ws.datasets.get('Churn-Modelling-Data-SDK')

# Step 3 - Creating the custom environment

In [None]:
# Importing the classes
from azureml.core import Environment
from azureml.core.environment import CondaDependencies

In [None]:
# create the environment
myenv = Environment(name='MyEnvironment')

# create the dependencies object
myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn', 'pip', 'pandas'],
                                     pip_packages=['azureml-defaults', 'azureml-sdk'])

myenv.python.conda_dependencies = myenv_dep

# Register the environment to workspace
myenv.register(workspace=ws)

{
    "assetId": "azureml://locations/westus/workspaces/553b8d5b-9bad-4be5-b801-a1f052f0eb8b/environments/MyEnvironment/versions/1",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20220729.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "MyEnvironment",
    "pyt

# Step 4 -Create the compute cluster

In [None]:
cluster_name = 'azureml-hyper-cluster'

# Import the class AmlCompute
from azureml.core.compute import AmlCompute

# Provision the configuration using AmlCompute
if cluster_name not in ws.compute_targets:
  compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_v2',
                                                         max_nodes=2)
  cluster = AmlCompute.create(workspace=ws, name=cluster_name, provisioning_configuration=compute_config)
  cluster.wait_for_completion()
else:
  cluster = ws.compute_targets[cluster_name]

Provisioning operation finished, operation "Succeeded"


# Step 5 - Script configuration

In [None]:
from azureml.core import ScriptRunConfig

In [None]:
script_config = ScriptRunConfig(source_directory='.',
                                script='hyperdrive_training_script.py',
                                arguments=['--input-data', input_ds.as_named_input('raw_data')],
                                environment=myenv,
                                compute_target=cluster)

# Step 6 - Create hyperdrive parameters

In [None]:
from azureml.train.hyperdrive import GridParameterSampling, choice

In [None]:
hyper_params = GridParameterSampling(
    {'--n_estimators': choice(10, 20, 30, 50),
     '--min_samples_leaf': choice(1, 2, 3)
     })

# Step 7 - Configure the Hyperdrive class

In [None]:
from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal

In [None]:
hyper_config = HyperDriveConfig(run_config=script_config,
                                hyperparameter_sampling=hyper_params,
                                policy=None,
                                primary_metric_name='accuracy',
                                primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                max_total_runs=20,
                                max_concurrent_runs=2)

# Step 8 - Create and submit experiment run

In [None]:
from azureml.core.experiment import Experiment

In [None]:
# Creating the experiment
new_exp = Experiment(workspace=ws, name='hyperdrive-experiment')

# submit the experiment run
new_run = new_exp.submit(config=hyper_config)
new_run.wait_for_completion()

{'runId': 'HD_116e48df-682d-4c8a-9420-d37a88069b75',
 'target': 'azureml-hyper-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-09-18T18:50:13.60677Z',
 'endTimeUtc': '2022-09-18T19:09:18.23497Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name":"accuracy","goal":"maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'da52c4dd-d654-40db-9c18-7154405358b3',
  'user_agent': 'python/3.7.14 (Linux-5.10.133+-x86_64-with-Ubuntu-18.04-bionic) msrest/0.7.1 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.45.0',
  'space_size': '12',
  'score': '0.8616666666666667',
  'best_child_run_id': 'HD_116e48df-682d-4c8a-9420-d37a88069b75_5',
  'best_metric_status': 'Succeeded',
  'best_data_container_id': 'dcid.HD_116e48df-682d-4c8a-9420-d37a88069b75_5'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'configuration': None,
  'attribution': None,
  'telemetryValues': {

# Step 9 - Best run and Best parameters

In [None]:
best_run = new_run.get_best_run_by_primary_metric()
print("Best Run ID : ", best_run.id)
print(best_run.get_metrics())

Best Run ID :  HD_116e48df-682d-4c8a-9420-d37a88069b75_5
{'accuracy': 0.8616666666666667}


In [None]:
best_run.get_tags()

{'_aml_system_hyperparameters': '{"--n_estimators": 20, "--min_samples_leaf": 3}',
 'hyperparameters': '{"--n_estimators": 20, "--min_samples_leaf": 3}',
 '_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}'}