# Hyperparameter Tuning using HyperDrive

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import numpy as np
import pandas as pd
import os
import glob 
import os

from datetime import datetime
from Train_Model import clean_data
from sklearn.model_selection import train_test_split

In [2]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
experiment_name = 'Hyp-tun-bankrupt-2'

experiment=Experiment(ws, experiment_name)


print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = experiment.start_logging()

Workspace name: ops_analytics-workspace
Azure region: westeurope
Subscription id: 73f88e6b-3a35-4612-b550-555157e7059f
Resource group: ops_analytics-rg-gb-dev


# Dataset

TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

## Choose the Compute Cluster 

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException 

# Choose a name for your CPU cluster
amlcompute_cluster_name = "Demo-Cluster"

# Verify that cluster does not exist already
try:
    aml_compute = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    print("Creating a new compute cluster...")
    compute_config =  AmlCompute.provisioning_configuration(vm_size='STANDARD_D2S_V3', max_nodes=4, min_nodes=0, vm_priority='lowpriority') 
    aml_compute = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

# Can poll for a minimum number of nodes and for a specific timeout. 
# If no min node count is provided it uses the scale settings for the cluster.
    
#aml_compute.wait_for_completion(show_output=True)
aml_compute.wait_for_completion(show_output=True, min_node_count = None, timeout_in_minutes = 60)
#compute_target.wait_for_completion(show_output=True, min_node_count = None, timeout_in_minutes = 60)

# use get_status() to get a detailed status for the current cluster. 
print(aml_compute.get_status().serialize())

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-12-11T06:14:25.730000+00:00', 'errors': None, 'creationTime': '2021-09-17T05:18:08.686648+00:00', 'modifiedTime': '2021-09-17T05:18:24.335748+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 1, 'nodeIdleTimeBeforeScaleDown': 'PT1800S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2S_V3'}


In [4]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

# to install required packages
env = Environment('tutorial-env')
cd = CondaDependencies.create(pip_packages=['azureml-dataset-runtime[pandas,fuse]', 'azureml-defaults'], conda_packages = ['scikit-learn==0.22.1'])

env.python.conda_dependencies = cd

# Register environment to re-use later
env.register(workspace = ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210714.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "tutorial-env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "conda-forge"
 

In [5]:
from azureml.core import ScriptRunConfig

args = ['--n_estimators', 100, '--max_depth', 7  , '--max_features' , 0.25 , '--n_jobs' , -1 ]

src = ScriptRunConfig(source_directory='.',
                      script='Train_Model.py', 
                      arguments=args,
                      compute_target=aml_compute,
                      environment=env)

In [6]:
run = experiment.submit(config=src)

Submitting /mnt/batch/tasks/shared/LS_root/mounts/clusters/trial/code/Users/narendhrakumar.murugan/starter_file directory for run. The size of the directory >= 25 MB, so it can take a few minutes.


In [7]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

## Hyperdrive Configuration

TODO: Explain the model you are using and the reason for chosing the different hyperparameters, termination policy and config settings.

In [12]:
# TODO: Create an early termination policy. This is not required if you are using Bayesian sampling.
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform , choice
from azureml.train.estimator import Estimator
from azureml.train.hyperdrive import BayesianParameterSampling


early_termination_policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=1, delay_evaluation=5)

#TODO: Create the different params that you will be using during training
param_sampling = RandomParameterSampling({
                    '--n_estimators': choice(100, 150,200,250,300),
                    '--max_depth': choice(7, 14,21,28,35,42),
                    '--max_features':uniform(0.25,0.50) })


hyperdrive_config = HyperDriveConfig(
                    hyperparameter_sampling=param_sampling,
                    policy = early_termination_policy,
                    primary_metric_name='accuracy',
                    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                    max_concurrent_runs = 4,    
                        max_total_runs=20,
                    run_config = src)

In [13]:
#TODO: Submit your experiment

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [14]:
hyperdrive_run = experiment.submit(hyperdrive_config)

In [15]:
from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()
# wait for completion
hyperdrive_run.wait_for_completion(show_output=True)

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_be475902-7a93-4cec-8f12-4f98502711df
Web View: https://ml.azure.com/runs/HD_be475902-7a93-4cec-8f12-4f98502711df?wsid=/subscriptions/73f88e6b-3a35-4612-b550-555157e7059f/resourcegroups/ops_analytics-rg-gb-dev/workspaces/ops_analytics-workspace&tid=cef04b19-7776-4a94-b89b-375c77a8f936

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-12-04T06:50:20.392802][API][INFO]Experiment created<END>\n""<START>[2021-12-04T06:50:21.278884][GENERATOR][INFO]Trying to sample '4' jobs from the hyperparameter space<END>\n""<START>[2021-12-04T06:50:21.459303][GENERATOR][INFO]Successfully sampled '4' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_be475902-7a93-4cec-8f12-4f98502711df
Web View: https://ml.azure.com/runs/HD_be475902-7a93-4cec-8f12-4f98502711df?wsid=/subscriptions/73f88e6b-3a35-4612-b550-555157e7059f/resourcegroups/ops_analytics-rg-gb-dev/workspaces/ops_analytics-workspace&tid=cef04b19-7776-4a94-b89b-375c77a8f936



{'runId': 'HD_be475902-7a93-4cec-8f12-4f98502711df',
 'target': 'Demo-Cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-12-04T06:50:20.145506Z',
 'endTimeUtc': '2021-12-04T07:30:36.744555Z',
 'properties': {'primary_metric_config': '{"name": "accuracy", "goal": "maximize"}',
  'resume_from': 'null',
  'runTemplate': 'HyperDrive',
  'azureml.runsource': 'hyperdrive',
  'platform': 'AML',
  'ContentSnapshotId': '46061cea-37f7-4354-99d3-67e2c73cfb73',
  'user_agent': 'python/3.6.9 (Linux-5.4.0-1055-azure-x86_64-with-debian-buster-sid) msrest/0.6.21 Hyperdrive.Service/1.0.0 Hyperdrive.SDK/core.1.33.0',
  'score': '0.25',
  'best_child_run_id': 'HD_be475902-7a93-4cec-8f12-4f98502711df_3',
  'best_metric_status': 'Succeeded'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'azureml-logs/hyperdrive.txt': 'https://opsanalyticswo3045522382.blob.core.windows.net/azureml/ExperimentRun/dcid.HD_be475902-7a93-4cec-8f12-4f98502711df/azureml-logs/hyperdrive.txt?sv=2019-07-07&sr=b&si

In [49]:
hyperdrive_run

Experiment,Id,Type,Status,Details Page,Docs Page
Hyp-tun-bankrupt,HD_bb6cd8b5-1964-48b9-ac32-33a0970aa96d,hyperdrive,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [39]:
hyperdrive_run

Experiment,Id,Type,Status,Details Page,Docs Page
Hyp-tun-bankrupt,HD_7488023f-faeb-47fb-87c6-de7a9f0df41f,hyperdrive,Completed,Link to Azure Machine Learning studio,Link to Documentation


## Best Model

TODO: In the cell below, get the best model from the hyperdrive experiments and display all the properties of the model.

In [54]:
import joblib
# Get your best run and save the model from that run.

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()


print('Best Run Id: ', best_run.id)
print('\n accuracy:', best_run_metrics['accuracy'])

Best Run Id:  HD_c7a10d9a-6a39-4128-9df3-bfe677e8ed95_2

 accuracy: 0.25


In [12]:
best_run.get_details()

{'runId': 'HD_5b1291c0-6afa-4978-bee9-748ae7c49674_2',
 'target': 'Demo-Cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-11-26T04:44:39.973395Z',
 'endTimeUtc': '2021-11-26T04:45:13.039466Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': '5a653421-dcdb-49af-9e54-9b5d70f1140b',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'Train_Model.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--n_estimators',
   '100',
   '--max_depth',
   '7',
   '--max_features',
   '0.25',
   '--n_jobs',
   '-1',
   '--max_depth',
   '14',
   '--max_features',
   '0.29861546693058016',
   '--n_estimators',
   '100'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'Demo-Cluster',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],


In [55]:
print(best_run.get_details()['runDefinition']['arguments'])

['--n_estimators', '100', '--max_depth', '7', '--max_features', '0.25', '--n_jobs', '-1', '--max_depth', '7', '--max_features', '0.46996715415782236', '--n_estimators', '100']


In [56]:
best_run.get_details()

{'runId': 'HD_c7a10d9a-6a39-4128-9df3-bfe677e8ed95_2',
 'target': 'Demo-Cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-11-28T11:17:41.030508Z',
 'endTimeUtc': '2021-11-28T11:18:16.677393Z',
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': 'd55a6945-2322-4b0e-97a8-ba105e3f9d24',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'Train_Model.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--n_estimators',
   '100',
   '--max_depth',
   '7',
   '--max_features',
   '0.25',
   '--n_jobs',
   '-1',
   '--max_depth',
   '7',
   '--max_features',
   '0.46996715415782236',
   '--n_estimators',
   '100'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'Demo-Cluster',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],
 

In [57]:
#TODO: Save the best model

# register model for future deployment
# os.makedirs('outputs', exist_ok=True)
import sklearn

from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration
#description = 'Bankruptcy-Prediction-Hypermodel'
tags = {'area': 'Bankruptcy' , 'type':'classification'}

hyperdrive_model = best_run.register_model(model_name='bkt-model-Hyp',
                                                      model_path='outputs/bkt-model-Hyp.pkl',
                                                      model_framework=Model.Framework.SCIKITLEARN,  # Framework used to create the model.
                                                      model_framework_version=sklearn.__version__,  # Version of scikit-learn used to create the model.
                                                      tags=tags
                                                      )

print(hyperdrive_model.name, hyperdrive_model.id, hyperdrive_model.version, sep='\t')

bkt-model-Hyp	bkt-model-Hyp:2	2


## Model Deployment

Remember you have to deploy only one of the two models you trained but you still need to register both the models. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [58]:
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.model import Model

TODO: In the cell below, send a request to the web service you deployed to test it.

In [59]:
%%writefile score.py
import json
import numpy as np
import os
import pickle
import joblib
import pandas as pd

def init():
    global model
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
    # For multiple models, it points to the folder containing all deployed models (./azureml-models)
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'bkt-model-Hyp.pkl')
    model = joblib.load(model_path)

def run(raw_data):
    try:
        data = json.loads(raw_data)['data']
        data = pd.DataFrame.from_dict(data)
        # make prediction
        mypredict = model.predict(data)
        return mypredict.tolist()
    except Exception as ex:
        error = str(ex)
        return error

Overwriting score.py


TODO: In the cell below, print the logs of the web service and delete the service

In [60]:
from azureml.core.webservice import AciWebservice

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               enable_app_insights=True, auth_enabled=True,
                                               tags={"data": "Bankruptcy",  "method" : "sklearn"}, 
                                               description='Predict Bankruptcy POCs')

In [63]:
%%time
import uuid
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model

ws = Workspace.from_config()
model = Model(ws, 'bkt-model-Hyp')


myenv = Environment.get(workspace=ws, name="tutorial-env", version="1")
inference_config = InferenceConfig(entry_script="score.py", environment=myenv)

service_name = 'bkt1'
service = Model.deploy(workspace=ws, 
                       name=service_name, 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config=aciconfig)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-11-28 14:09:52+00:00 Creating Container Registry if not exists.
2021-11-28 14:09:52+00:00 Registering the environment.
2021-11-28 14:09:55+00:00 Use the existing image.
2021-11-28 14:09:55+00:00 Generating deployment configuration.
2021-11-28 14:09:56+00:00 Submitting deployment to compute.
2021-11-28 14:10:02+00:00 Checking the status of deployment bkt1..
2021-11-28 14:13:16+00:00 Checking the status of inference endpoint bkt1.
Succeeded
ACI service creation operation finished, operation "Succeeded"
CPU times: user 4.76 s, sys: 621 ms, total: 5.38 s
Wall time: 3min 32s


In [25]:
print(service.scoring_uri)

http://af885a24-965d-49d8-bdc2-b8b949dbdc3a.westeurope.azurecontainer.io/score


In [26]:
print(service.get_logs())

2021-11-26T05:36:18,145412900+00:00 - gunicorn/run 
Dynamic Python package installation is disabled.
Starting HTTP server
2021-11-26T05:36:18,162071100+00:00 - rsyslog/run 
2021-11-26T05:36:18,165008200+00:00 - nginx/run 
2021-11-26T05:36:18,171856800+00:00 - iot-server/run 
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-11-26T05:36:18,585780600+00:00 - iot-server/finish 1 0
2021-11-26T05:36:18,591944500+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (67)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 98
SPARK_HOME not set. Skipping PySpark Initialization.
Initializing logger
2021-11-26 05:36:20,266 | root | INFO | Starting up app insights client
logging socket was found. logging is available.
logging socket was found. logging is available.
2021-11-26 05:36:20,267 | root | INFO | Starting up request id generator
2021-11-26 05:36:20,267 | root | INFO | Star

In [64]:
primary, secondary = service.get_keys()
print(primary)

fseaM1SlC6JWgcTTFURmI2M2kMb2JVwg


In [65]:
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core import Dataset

data = TabularDatasetFactory.from_delimited_files(path="https://raw.githubusercontent.com/NAMU2NI/Capstone_Project/main/payer_level_features.csv")
x_data, y_data = clean_data(data)
x_data = x_data.drop(['Payer', 'Bankruptcy month'], axis=1)

In [66]:
x_data[4887:4889]

Unnamed: 0,# POCs,Rent,Perc Rent,Average Receivables,Average Overdue,Overdue,1 - 29 Days,30 - 59 Days,60 - 89 Days,Payment Term Days,...,Payment Term_042I,Payment Term_045I,Payment Term_049I,Payment Term_056I,Payment Term_060I,Payment Term_063I,Payment Behavior Status_AVERAGE,Payment Behavior Status_EXCELLENT,Payment Behavior Status_GOOD,Payment Behavior Status_STRUCTURAL OFFENDER
4887,1,0.0,0.0,12015.97,20026.616667,24031.94,0.0,0.0,0.0,14,...,0,0,0,0,0,0,0,1,0,0
4888,1,0.0,0.0,5531.766667,1857.285,1972.45,1972.45,0.0,0.0,14,...,0,0,0,0,0,0,0,0,0,1


In [67]:
y_data[4887:4889]

4887    1
4888    0
Name: FLAG_BAD_DEBT, dtype: int64

In [68]:
import json

input_payload = json.dumps({
    'data': x_data[4887:4889].to_dict(orient='records'),
    'method': 'predict'  # If you have a classification model, you can get probabilities by changing this to 'predict_proba'.
})

output = service.run(input_payload)

print(output)

[1, 0]


In [69]:
print(service)

AciWebservice(workspace=Workspace.create(name='ops_analytics-workspace', subscription_id='73f88e6b-3a35-4612-b550-555157e7059f', resource_group='ops_analytics-rg-gb-dev'), name=bkt1, image_id=None, compute_type=None, state=ACI, scoring_uri=Healthy, tags=http://c86fe57b-aa5b-4455-a748-c0f7547aaba4.westeurope.azurecontainer.io/score, properties={'data': 'Bankruptcy', 'method': 'sklearn'}, created_by={'azureml.git.branch': 'master', 'mlflow.source.git.branch': 'master', 'azureml.git.commit': 'a3c5b47ae395dd1677bbfc1ce6d42b58ab484eb0', 'mlflow.source.git.commit': 'a3c5b47ae395dd1677bbfc1ce6d42b58ab484eb0', 'azureml.git.dirty': 'True', 'hasInferenceSchema': 'False', 'hasHttps': 'False'})


**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.

