# Hyperparameter Tuning using HyperDrive

 Import Dependencies.

In [None]:
import logging

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import warnings
import os

# Squash warning messages for cleaner output in the notebook
warnings.showwarning = lambda *args, **kwargs: None

import azureml.core
from azureml.core import Experiment, Workspace, Dataset
from azureml.train.automl import AutoMLConfig
from datetime import datetime

## Dataset

 Get data.  write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [1]:
from azureml.core import Workspace, Experiment

ws = Workspace.get(name="quick-starts-ws-127528")
exp = Experiment(workspace=ws, name="hyper_drive")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')


Workspace name: quick-starts-ws-127528
Azure region: southcentralus
Subscription id: 30d182b7-c8c4-421c-8fa0-d3037ecfe6d2
Resource group: aml-quickstarts-127528


In [2]:
run = exp.start_logging()

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException


# TODO: Create compute cluster
# Use vm_size = "Standard_D2_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.
cluster_name="hd-cluster"
try:
  compute_target=ComputeTarget(workspace=ws,name=cluster_name)
  print("Found existing compute target")
except ComputeTargetException:
  print('Creating a new compute target')
  compute_config=AmlCompute.provisioning_configuration(vm_size='Standard_D2_V2',max_nodes=4)
  compute_target=ComputeTarget.create(ws,cluster_name,compute_config)
  

compute_targets = ws.compute_targets
for name, ct in compute_targets.items():
    print(name, ct.type, ct.provisioning_state)

Creating a new compute target
hd-cluster AmlCompute Creating
notebook127528 ComputeInstance Succeeded


## Hyperdrive Configuration

1.The model used here is SVM for classification since its more robust to outliers than Logistic Regression and uses non linear kernel

2.Bandit policy is an early termination policy based on slack factor, the early policy terminates the run when the primary metric is not within the speciifed slack factor amount with respect to the best performing training run

3.The model uses Random Parameter Sampling for finding the inverse regularization strenghth and coef0

4.The Estimator is specified using SKLearn and the hyperdrive config is specified using the estimator

In [7]:
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform,choice
import os

# Specify parameter sampler
ps = RandomParameterSampling(parameter_space={"--C": choice(0.001,0.01, 0.1, 1, 10, 100) , "--coef0": choice(0,1,2,3)})

# Specify a Policy
policy = BanditPolicy(evaluation_interval=5,slack_factor=0.1)
if "training" not in os.listdir():
    os.mkdir("./training")

# Create a SKLearn estimator for use with train.py
est =SKLearn(source_directory='./',compute_target=compute_target,entry_script='train.py') ### YOUR CODE HERE ###

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config =HyperDriveConfig(estimator=est , hyperparameter_sampling=ps, policy=policy , primary_metric_name='Accuracy',primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,max_total_runs=50,max_concurrent_runs=4) 

In [8]:
# Submit your experiment
hyperdrive_run=exp.submit(hyperdrive_config)



## Run Details



In [9]:
from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_21512e0b-61ca-4838-a179-167560244e49
Web View: https://ml.azure.com/experiments/hyper_drive/runs/HD_21512e0b-61ca-4838-a179-167560244e49?wsid=/subscriptions/30d182b7-c8c4-421c-8fa0-d3037ecfe6d2/resourcegroups/aml-quickstarts-127528/workspaces/quick-starts-ws-127528

Streaming azureml-logs/hyperdrive.txt

"<START>[2020-11-23T11:39:14.870083][API][INFO]Experiment created<END>\n"<START>[2020-11-23T11:39:16.5847249Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>"<START>[2020-11-23T11:39:17.745181][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2020-11-23T11:39:18.252398][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_21512e0b-61ca-4838-a179-167560244e49
Web View: https://ml.azure.com/experiments/hyper_drive/runs/HD_21512e0b-61ca-4838-a179-167560244e49?wsid=/subscriptions/30d182b7-c8c4

{'runId': 'HD_21512e0b-61ca-4838-a179-167560244e49',
 'target': 'hd-cluster',
 'status': 'Completed',
 'startTimeUtc': '2020-11-23T11:39:14.592628Z',
 'endTimeUtc': '2020-11-23T11:59:34.469721Z',
 'properties': {'primary_metric_config': '{"name": "Accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '941000c0-580e-4873-8f08-f669e5f61a82',
  'score': '0.625',
  'best_child_run_id': 'HD_21512e0b-61ca-4838-a179-167560244e49_0',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://mlstrg127528.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_21512e0b-61ca-4838-a179-167560244e49/azureml-logs/hyperdrive.txt?sv=2019-02-02&sr=b&sig=xWbmuc6pcsaZiT83fKjQY51KJ1BEvTJ98bYovsmiDYY%3D&st=2020-11-23T11%3A49%3A59Z&se=2020-11-23T19%3A59%3A59Z&sp=r'}}

## Best Model


In [10]:
best_run=hyperdrive_run.get_best_run_by_primary_metric()

In [11]:
best_run_metrics=best_run.get_metrics()

In [12]:
print(best_run.get_details()['runDefinition']['arguments'])

['--C', '1', '--coef0', '0']


In [13]:
print("Best_run_id",best_run.id)
print("Best_run_accuracy",best_run_metrics['Accuracy'])

Best_run_id HD_21512e0b-61ca-4838-a179-167560244e49_0
Best_run_accuracy 0.625


In [14]:
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
hyper_drive,HD_21512e0b-61ca-4838-a179-167560244e49_0,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [15]:
best_run.get_file_names()

['azureml-logs/55_azureml-execution-tvmps_a4396d854a48ee90b16ed40d6b5955a12c86dc21ea469d13866e7474784713bd_d.txt',
 'azureml-logs/65_job_prep-tvmps_a4396d854a48ee90b16ed40d6b5955a12c86dc21ea469d13866e7474784713bd_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_a4396d854a48ee90b16ed40d6b5955a12c86dc21ea469d13866e7474784713bd_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'logs/azureml/106_azureml.log',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/model.joblib']

In [16]:

model=best_run.register_model(model_name='model_hyper',model_path='outputs/model.joblib')

In [17]:
print(model)

Model(workspace=Workspace.create(name='quick-starts-ws-127528', subscription_id='30d182b7-c8c4-421c-8fa0-d3037ecfe6d2', resource_group='aml-quickstarts-127528'), name=model_hyper, id=model_hyper:1, version=1, tags={}, properties={})


In [18]:
compute_target.delete()

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

Current provisioning state of AmlCompute is "Deleting"

