# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import os
import shutil
import joblib
import logging
import csv
from azureml.core import Dataset, Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails

from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, choice
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import LocalWebservice, AciWebservice
from azureml.core.model import Model
import requests
import json

from azureml.core import ScriptRunConfig
import azureml.core

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.19.0


### Workspace

In [2]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
exp = Experiment(workspace=ws, name="hd-udacity-capstone")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = exp.start_logging()

Workspace name: quick-starts-ws-135035
Azure region: southcentralus
Subscription id: 1b944a9b-fdae-4f97-aeb1-b7eea0beac53
Resource group: aml-quickstarts-135035


### Configure Compute cluster

In [3]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

compute_cluster_name = "hd-project"

try:
    compute_target = ComputeTarget(workspace=ws, name=compute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=5, 
                                                           idle_seconds_before_scaledown=2400,
                                                           vm_priority='lowpriority')
    compute_target = ComputeTarget.create(ws, compute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 2)

Creating
Succeeded....................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


In [4]:
compute_targets = ws.compute_targets
for name, ct in compute_targets.items():
    print(name, ct.type, ct.provisioning_state)

Capstone-udacity ComputeInstance Succeeded
hd-project AmlCompute Succeeded


## Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [5]:
from azureml.core import Dataset
from azureml.data.dataset_factory import DataType

# create a TabularDataset from a delimited file behind a public web url and convert column "Survived" to boolean
web_path ="https://raw.githubusercontent.com/Aishwaryasasanapuri/test2/main/glass.csv"
final_df = Dataset.Tabular.from_delimited_files(path=web_path) #, set_column_types={'Type': DataType.to_bool()})

# preview the first 3 rows of titanic_ds
final_df.take(3).to_pandas_dataframe()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1


## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [6]:
# Specify parameter sampler
ps = RandomParameterSampling(parameter_space={"--C":uniform(0.01,1),"--max_iter": choice(50,100,150,200)})

# Specify a Policy
policy = BanditPolicy(evaluation_interval = 5, slack_factor = 0.1, delay_evaluation = 1)

if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est = SKLearn(source_directory='./',
              compute_target=compute_target,
              entry_script='train.py')

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(estimator=est,
                             hyperparameter_sampling=ps,
                             policy=policy,
                             primary_metric_name="Accuracy",
                             primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                             max_total_runs= 100,
                             max_concurrent_runs=4)

'SKLearn' estimator is deprecated. Please use 'ScriptRunConfig' from 'azureml.core.script_run_config' with your own defined environment or the AzureML-Tutorial curated environment.


In [7]:
# Submitting the hyperdrive run to the experiment
hyperdrive_run = exp.submit(hyperdrive_config, show_output=True)



## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [8]:
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [9]:
hyperdrive_run.get_status()

'Running'

In [10]:
hyperdrive_run.wait_for_completion(show_output=True)

RunId: HD_58778be0-1256-465b-a539-8511f4dd06ee
Web View: https://ml.azure.com/experiments/hd-udacity-capstone/runs/HD_58778be0-1256-465b-a539-8511f4dd06ee?wsid=/subscriptions/1b944a9b-fdae-4f97-aeb1-b7eea0beac53/resourcegroups/aml-quickstarts-135035/workspaces/quick-starts-ws-135035

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-01-17T11:30:48.870596][API][INFO]Experiment created<END>\n""<START>[2021-01-17T11:30:49.365062][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-01-17T11:30:49.528543][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2021-01-17T11:30:51.0144160Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>

Execution Summary
RunId: HD_58778be0-1256-465b-a539-8511f4dd06ee
Web View: https://ml.azure.com/experiments/hd-udacity-capstone/runs/HD_58778be0-1256-465b-a539-8511f4dd06ee?wsid=/subscriptio

{'runId': 'HD_58778be0-1256-465b-a539-8511f4dd06ee',
 'target': 'hd-project',
 'status': 'Completed',
 'startTimeUtc': '2021-01-17T11:30:48.607176Z',
 'endTimeUtc': '2021-01-17T12:16:35.849163Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '06a83e33-90bd-4ee0-8753-17696e7db5ef',
  'score': '0.6615384615384615',
  'best_child_run_id': 'HD_58778be0-1256-465b-a539-8511f4dd06ee_1',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg135035.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_58778be0-1256-465b-a539-8511f4dd06ee/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=2BchD86W8%2BHmIvSGGWG3OKd6GgRyESbzJmE85i6Hejs%3D&st=2021-01-17T12%3A06%3A38Z&se=2021-01-17T20%3A16%3A38Z&sp=r'}}

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [11]:
import joblib
# Get your best run and save the model from that run.

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
hd-udacity-capstone,HD_58778be0-1256-465b-a539-8511f4dd06ee_1,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [12]:
parameter_values = best_run.get_details()['runDefinition']['arguments']
print(parameter_values)

['--C', '0.3650732320679472', '--max_iter', '50']


In [13]:
best_run_metrics = best_run.get_metrics()
print("Best_run_id",best_run.id)
print('\n Metrics: ', best_run_metrics)
print("Best_run_accuracy",best_run_metrics['Accuracy'])

Best_run_id HD_58778be0-1256-465b-a539-8511f4dd06ee_1

 Metrics:  {'Regularization Strength:': 0.3650732320679472, 'Max iterations:': 50, 'Accuracy': 0.6615384615384615}
Best_run_accuracy 0.6615384615384615


In [14]:
best_run.get_file_names()

['azureml-logs/55_azureml-execution-tvmps_bf5f3c93bb239ec6e7922f01c854b8d6ca1651884fcdc282c94ac08cc1cc35c4_p.txt',
 'azureml-logs/65_job_prep-tvmps_bf5f3c93bb239ec6e7922f01c854b8d6ca1651884fcdc282c94ac08cc1cc35c4_p.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_bf5f3c93bb239ec6e7922f01c854b8d6ca1651884fcdc282c94ac08cc1cc35c4_p.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'logs/azureml/100_azureml.log',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/hdmodel.joblib']

In [15]:
#TODO: Save the best model

best_model_hd=best_run.register_model(model_name='HyperdriveModel',model_path='outputs/hdmodel.joblib',description='best hyperdrive model')
best_model_hd

Model(workspace=Workspace.create(name='quick-starts-ws-135035', subscription_id='1b944a9b-fdae-4f97-aeb1-b7eea0beac53', resource_group='aml-quickstarts-135035'), name=HyperdriveModel, id=HyperdriveModel:1, version=1, tags={}, properties={})

In [16]:
print(best_model_hd)

Model(workspace=Workspace.create(name='quick-starts-ws-135035', subscription_id='1b944a9b-fdae-4f97-aeb1-b7eea0beac53', resource_group='aml-quickstarts-135035'), name=HyperdriveModel, id=HyperdriveModel:1, version=1, tags={}, properties={})
