# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [2]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
#from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

#from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.39.0


## Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [3]:
ws = Workspace.from_config()
experiment_name = 'heart_failure_exp'

experiment=Experiment(ws, experiment_name)

In [4]:
default_ds = ws.get_default_datastore()

if 'Heart Failure' not in ws.datasets:
    default_ds.upload_files(files=['./heart_failure_clinical_records_dataset.csv'], # Upload the diabetes csv files in /data
                        target_path='data/', # Put it in a folder path in the datastore
                        overwrite=True, # Replace existing files of the same name
                        show_progress=True)

    #Create a tabular dataset from the path on the datastore (this may take a short while)
    tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'data/*.csv'))

    # Register the tabular dataset
    try:
        tab_data_set = tab_data_set.register(workspace=ws, 
                                name='Heart Failure',
                                description='Clinic data',
                                tags = {'format':'CSV'},
                                create_new_version=True)
        print('Dataset registered.')
    except Exception as ex:
        print(ex)
else:
    print('Dataset already registered.')

Dataset already registered.


In [5]:
import os, shutil

# Create a folder for the experiment files
folder_name = 'training'
experiment_folder = './' + folder_name
os.makedirs(folder_name, exist_ok=True)

Define a Python environment to host on the compute, by configuring Conda configuration file.

In [6]:
%%writefile $experiment_folder/conda_dependencies.yml
dependencies:
  - python=3.6.2
  - scikit-learn
  - numpy
  - pandas
  - pip:
    - azureml-defaults

Writing ./training/conda_dependencies.yml


Standard_DS11_v2 image is recommended to achieve the optimal balance of cost and performance.

In [7]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "aml-cluster"

try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


## Prepare a training script

In [8]:
%%writefile $experiment_folder/heart_training.py
# Import libraries
import argparse, joblib, os
from azureml.core import Run
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import  AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import MinMaxScaler

# Get the experiment run context
run = Run.get_context()

# Get script arguments
parser = argparse.ArgumentParser()

# Input dataset
parser.add_argument("--input-data", type=str, dest='input_data', help='training dataset')

# Hyperparameters
parser.add_argument('--learning_rate', type=float, dest='learning_rate', default=0.1, help='learning rate')
parser.add_argument('--n_estimators', type=int, dest='n_estimators', default=100, help='number of estimators')

# Add arguments to args collection
args = parser.parse_args()

# Log Hyperparameter values
run.log('learning_rate',  np.float(args.learning_rate))
run.log('n_estimators',  np.int(args.n_estimators))

# load the diabetes dataset
print("Loading Data...")

df=run.input_datasets['training_data'].to_pandas_dataframe() #

# Preprocess data
un_val=df.drop('DEATH_EVENT',axis=1).nunique().to_frame()
categorical_features=un_val[un_val[0]<3].index.tolist()
numeric_features=un_val[un_val[0]>2].index.tolist()
label='DEATH_EVENT'
df_features=df.drop('DEATH_EVENT',axis=1)

scaler =MinMaxScaler()
df_features[numeric_features] = scaler.fit_transform(df_features[numeric_features])


#Seperate feature and labels
X,y=df_features.values, df[label].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

# Train a Ada Boost classification model with the specified hyperparameters
print('Training a classification model')

#max_depth=2, less depth to save time and avoid overfiting
model = AdaBoostClassifier(base_estimator = DecisionTreeClassifier(max_depth=3),random_state=42,learning_rate=args.learning_rate,n_estimators=args.n_estimators).fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)
run.log('Accuracy', np.float(acc))

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test,y_scores[:,1])
print('AUC: ' + str(auc))
run.log('AUC', np.float(auc))

# Save the model in the run outputs
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/heart_model.pkl')

run.complete()

Writing ./training/heart_training.py


## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [8]:
'''
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.train.hyperdrive import GridParameterSampling, HyperDriveConfig, PrimaryMetricGoal, choice
from azureml.widgets import RunDetails
from azureml.train.hyperdrive import BanditPolicy

# Create a Python environment for the experiment
hyper_env = Environment.from_conda_specification("experiment_env", experiment_folder + "/conda_dependencies.yml")

# Get the training dataset
heart_ds = ws.datasets.get("Heart Failure")

# Create a script config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                                script='heart_training.py',
                                # Add non-hyperparameter arguments -in this case, the training dataset
                                arguments = ['--input-data', heart_ds.as_named_input('training_data')],
                                environment=hyper_env,
                                compute_target = training_cluster)

early_termination_policy = BanditPolicy(slack_amount = 0.2,
                                        evaluation_interval=1,
                                        delay_evaluation=5)


# Sample a range of parameter values
params = GridParameterSampling(
    {
        # Hyperdrive will try 6 combinations, adding these as script arguments
        '--learning_rate': choice(0.01, 0.1, 1.0),
        '--n_estimators' : choice(10,100)
    }
)

# Configure hyperdrive settings
hyperdrive = HyperDriveConfig(run_config=script_config, 
                          hyperparameter_sampling=params, 
                          policy=early_termination_policy, # No early stopping policy
                          primary_metric_name='AUC', # Find the highest AUC metric
                          primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                          max_total_runs=6, # Restict the experiment to 6 iterations
                          max_concurrent_runs=2) # Run up to 2 iterations in parallel

'''

In [9]:
'''
# Run the experiment
experiment = Experiment(workspace=ws, name='heart-hyperdrive')
run = experiment.submit(config=hyperdrive)

# Show the status in the notebook as the experiment runs
RunDetails(run).show()
run.wait_for_completion()
'''

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

{'runId': 'HD_25d95884-0d1d-4a77-8f52-cabb9e9120b6',
 'target': 'aml-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-03-22T19:50:24.004906Z',
 'endTimeUtc': '2022-03-22T20:08:06.904052Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "AUC", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'cde9891d-7b1d-4ca1-a0ff-3c66350d54d6',
  'user_agent': 'python/3.6.9 (Linux-5.4.0-1068-azure-x86_64-with-debian-buster-sid) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.38.0',
  'space_size': '6',
  'score': '0.9495967741935485',
  'best_child_run_id': 'HD_25d95884-0d1d-4a77-8f52-cabb9e9120b6_0',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://amlworkspace5624884502.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_25d95884-0d1d-4a77-8f52-cabb9e9120b6/azure

## HD Config

In [9]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.train.hyperdrive import GridParameterSampling, RandomParameterSampling,HyperDriveConfig, PrimaryMetricGoal, choice,uniform
from azureml.widgets import RunDetails
from azureml.train.hyperdrive import BanditPolicy

# Create a Python environment for the experiment
hyper_env = Environment.from_conda_specification("experiment_env", experiment_folder + "/conda_dependencies.yml")

# Get the training dataset
heart_ds = ws.datasets.get("Heart Failure")

# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
early_termination_policy = BanditPolicy(slack_amount = 0.2,
                                        evaluation_interval=1,
                                        delay_evaluation=5)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling(
    {
        # Hyperdrive will try combinations, adding these as script arguments
        '--learning_rate': uniform(0.05, 0.1),
        '--n_estimators' : choice(50,75,100,150)
    }
)

#TODO: Create your estimator and hyperdrive config
estimator = ScriptRunConfig(source_directory=experiment_folder,
                                script='heart_training.py',
                                # Add non-hyperparameter arguments -in this case, the training dataset
                                arguments = ['--input-data', heart_ds.as_named_input('training_data')],
                                environment=hyper_env,
                                compute_target = training_cluster)

hyperdrive_run_config = HyperDriveConfig(run_config=estimator, 
                          hyperparameter_sampling=param_sampling, 
                          policy=early_termination_policy, # No early stopping policy
                          primary_metric_name='AUC', # Find the highest AUC metric
                          primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                          max_total_runs=6, # Restict the experiment to 6 iterations
                          max_concurrent_runs=2) # Run up to 2 iterations in parallel


In [10]:
# Run the experiment
experiment = Experiment(workspace=ws, name='heart-hyperdrive')
run = experiment.submit(config=hyperdrive_run_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [11]:
# Show the status in the notebook as the experiment runs
RunDetails(run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [12]:
run.wait_for_completion(show_output=True)

RunId: HD_af18dd1d-0b28-4747-8414-839a656415c7
Web View: https://ml.azure.com/runs/HD_af18dd1d-0b28-4747-8414-839a656415c7?wsid=/subscriptions/6eee9ed4-ef02-4ad0-bbbd-194153ad2069/resourcegroups/aml-resource/workspaces/aml-workspace&tid=b8f88a0b-3d95-478b-b123-a494d78523a4

Streaming azureml-logs/hyperdrive.txt

"<START>[2022-04-06T14:56:43.000879][API][INFO]Experiment created<END>\n""<START>[2022-04-06T14:56:43.870270][GENERATOR][INFO]Trying to sample '2' jobs from the hyperparameter space<END>\n"<START>[2022-04-06T14:56:44.7561362Z][SCHEDULER][INFO]Scheduling job, id='HD_af18dd1d-0b28-4747-8414-839a656415c7_0'<END>"<START>[2022-04-06T14:56:44.838644][GENERATOR][INFO]Successfully sampled '2' jobs, they will soon be submitted to the execution target.<END>\n"<START>[2022-04-06T14:56:44.8809520Z][SCHEDULER][INFO]Scheduling job, id='HD_af18dd1d-0b28-4747-8414-839a656415c7_1'<END><START>[2022-04-06T14:56:45.4436987Z][SCHEDULER][INFO]Successfully scheduled a job. Id='HD_af18dd1d-0b28-4747-8

{'runId': 'HD_af18dd1d-0b28-4747-8414-839a656415c7',
 'target': 'aml-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-04-06T14:56:42.722241Z',
 'endTimeUtc': '2022-04-06T15:06:54.133196Z',
 'services': {},
 'properties': {'primary_metric_config': '{"name": "AUC", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': 'd38d2611-e5c6-4630-baef-adb7d83c18fa',
  'user_agent': 'python/3.8.5 (Linux-5.4.0-1073-azure-x86_64-with-glibc2.10) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.39.0',
  'space_size': 'infinite_space_size',
  'score': '0.9193548387096775',
  'best_child_run_id': 'HD_af18dd1d-0b28-4747-8414-839a656415c7_4',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://amlworkspace6104020693.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_af18dd1d-0b28-4747-8414-839a6564

## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [13]:
# Print all child runs, sorted by the primary metric
#for child_run in run.get_children_sorted_by_primary_metric():
#    print(child_run)

# Get the best run, and its metrics and arguments
best_run = run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
script_arguments = best_run.get_details() ['runDefinition']['arguments']
print('Best Run Id: ', best_run.id)
print(' -AUC:', best_run_metrics['AUC'])
print(' -Accuracy:', best_run_metrics['Accuracy'])
print(' -Arguments:',script_arguments)

Best Run Id:  HD_af18dd1d-0b28-4747-8414-839a656415c7_4
 -AUC: 0.9193548387096775
 -Accuracy: 0.8555555555555555
 -Arguments: ['--input-data', 'DatasetConsumptionConfig:training_data', '--learning_rate', '0.062214747335019305', '--n_estimators', '150']


## Model Deployment

Remember you have to deploy only one of the two models you trained but you still need to register both the models. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [14]:
#TODO: Save the best model
from azureml.core import Model

# Register model
best_run.register_model(model_path='outputs/heart_model.pkl', model_name='HyperDrive_heart_model',
                        tags={'Training context':'Hyperdrive'},
                        properties={'AUC': best_run_metrics['AUC'], 'Accuracy': best_run_metrics['Accuracy']})


Model(workspace=Workspace.create(name='aml-workspace', subscription_id='6eee9ed4-ef02-4ad0-bbbd-194153ad2069', resource_group='aml-resource'), name=HyperDrive_heart_model, id=HyperDrive_heart_model:1, version=1, tags={'Training context': 'Hyperdrive'}, properties={'AUC': '0.9193548387096775', 'Accuracy': '0.8555555555555555'})

In [15]:
# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

HyperDrive_heart_model version: 1
	 Training context : Hyperdrive
	 AUC : 0.9193548387096775
	 Accuracy : 0.8555555555555555


AutoML_heart_model version: 1
	 Training context : AutoML




In [16]:
model = ws.models['HyperDrive_heart_model']
print(model.name, 'version', model.version)

HyperDrive_heart_model version 1


In [17]:
import os

# Create a folder for the deployment files
deployment_folder = './heart_service'
os.makedirs(deployment_folder, exist_ok=True)
print(deployment_folder, 'folder created.')

# Set path for scoring script
script_file = 'score_heart.py'
script_path = os.path.join(deployment_folder,script_file)

./heart_service folder created.


In [18]:
%%writefile $script_path
import json
import joblib
import numpy as np
import os

# Called when the service is loaded
def init():
    global model
    # Get the path to the deployed model file and load it
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'heart_model.pkl')
    model = joblib.load(model_path)

# Called when a request is received
def run(raw_data):
    # Get the input data as a numpy array
    data = np.array(json.loads(raw_data)['data'])
    # Get a prediction from the model
    predictions = model.predict(data)
    # Get the corresponding classname for each prediction (0 or 1)
    classnames = [1, 0]
    predicted_classes = []
    for prediction in predictions:
        predicted_classes.append(classnames[prediction])
    # Return the predictions as JSON
    return json.dumps(predicted_classes)

Writing ./heart_service/score_heart.py


In [19]:
from azureml.core import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice

# Configure the scoring environment
service_env = Environment(name='service-env')
python_packages = ['scikit-learn', 'azureml-defaults', 'azure-ml-api-sdk']
for package in python_packages:
    service_env.python.conda_dependencies.add_pip_package(package)
inference_config = InferenceConfig(source_directory=deployment_folder,
                                   entry_script=script_file,
                                   environment=service_env)

# Configure the web service container
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

# Deploy the model as a service
print('Deploying model...')
service_name = "heart-service"
service = Model.deploy(ws, service_name, [model], inference_config, deployment_config, overwrite=True)
service.wait_for_deployment(True)
print(service.state)

Deploying model...
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-04-04 12:08:18+00:00 Creating Container Registry if not exists.
2022-04-04 12:08:18+00:00 Registering the environment.
2022-04-04 12:08:19+00:00 Building image..
2022-04-04 12:12:32+00:00 Generating deployment configuration.
2022-04-04 12:12:34+00:00 Submitting deployment to compute..
2022-04-04 12:12:39+00:00 Checking the status of deployment heart-service..
2022-04-04 12:13:47+00:00 Checking the status of inference endpoint heart-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [25]:
heart_ds.to_pandas_dataframe().iloc[0:1,0:12].values.tolist()

[[75.0, 0.0, 582.0, 0.0, 20.0, 1.0, 265000.0, 1.9, 130.0, 1.0, 0.0, 4.0]]

In [26]:
import json

x_new = heart_ds.to_pandas_dataframe().iloc[0:1,0:12].values.tolist()
print ('Patient: {}'.format(x_new[0]))

# Convert the array to a serializable list in a JSON document
input_json = json.dumps({"data": x_new})

# Call the web service, passing the input data (the web service will also accept the data in binary format)
predictions = service.run(input_data = input_json)

# Get the predicted class - it'll be the first (and only) one.
predicted_classes = json.loads(predictions)
print(predicted_classes[0])

Patient: [75.0, 0.0, 582.0, 0.0, 20.0, 1.0, 265000.0, 1.9, 130.0, 1.0, 0.0, 4.0]
1


TODO: In the cell below, send a request to the web service you deployed to test it.

In [29]:
endpoint = service.scoring_uri


import requests
import json

x_new = heart_ds.to_pandas_dataframe().iloc[0:3,0:12].values.tolist()

# Convert the array to a serializable list in a JSON document
input_json = json.dumps({"data": x_new})

# Set the content type
headers = { 'Content-Type':'application/json' }

predictions = requests.post(endpoint, input_json, headers = headers)
predicted_classes = json.loads(predictions.json())

for i in range(len(x_new)):
    print ("Patient {}".format(x_new[i]), predicted_classes[i] )

Patient [75.0, 0.0, 582.0, 0.0, 20.0, 1.0, 265000.0, 1.9, 130.0, 1.0, 0.0, 4.0] 1
Patient [55.0, 0.0, 7861.0, 0.0, 38.0, 0.0, 263358.03, 1.1, 136.0, 1.0, 0.0, 6.0] 1
Patient [65.0, 0.0, 146.0, 0.0, 20.0, 0.0, 162000.0, 1.3, 129.0, 1.0, 1.0, 7.0] 1


In [31]:
heart_ds.to_pandas_dataframe().iloc[0:3,]

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1


TODO: In the cell below, print the logs of the web service and delete the service

In [2]:
from azureml.core import Workspace
from azureml.core.webservice import Webservice

# Requires the config to be downloaded first to the current working directory
ws = Workspace.from_config()

# Set with the deployment name
name = "heart-service"

# load existing web service
service = Webservice(name=name, workspace=ws)

# enable application insight
service.update(enable_app_insights=True)

logs = service.get_logs()

for line in logs.split('\n'):
    print(line)

# If you need to make a change and redeploy, you may need to delete unhealthy service using the following code:
#service.delete()

2022-04-04T12:13:31,987564800+00:00 - rsyslog/run 
2022-04-04T12:13:31,987384200+00:00 - iot-server/run 
2022-04-04T12:13:32,007923200+00:00 - gunicorn/run 
Dynamic Python package installation is disabled.
Starting HTTP server
2022-04-04T12:13:32,034762100+00:00 - nginx/run 
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2022-04-04T12:13:32,492958100+00:00 - iot-server/finish 1 0
2022-04-04T12:13:32,494933100+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (66)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 98
SPARK_HOME not set. Skipping PySpark Initialization.
Initializing logger
2022-04-04 12:13:33,537 | root | INFO | Starting up app insights client
logging socket was found. logging is available.
logging socket was found. logging is available.
2022-04-04 12:13:33,538 | root | INFO | Starting up request id generator
2022-04-04 12:13:33,538 | root | INFO | Star

In [None]:
service.delete()
print ('Service deleted.')

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.

