# Automated ML

Import all the dependencies that will be needed for the project.

In [1]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset


print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


# Workspace

In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

quick-starts-ws-136593
aml-quickstarts-136593
southcentralus
976ee174-3882-4721-b90a-b5fef6b72f24


## Dataset

### Overview
We will be using a dataset about hear disease from [kaggle](https://www.kaggle.com/ronitf/heart-disease-uci)

* age: age in years 
* sex: (1 = male; 0 = female) 
* cp: chest pain type
* trestbps: resting blood pressure (in mm Hg on admission to the hospital)
* chol: serum cholestoral in mg/dl 
* fbs: (fasting blood sugar &gt; 120 mg/dl) (1 = true; 0 = false) 
* restecg: resting electrocardiographic results 
* thalach: maximum heart rate achieved 
* exang: exercise induced angina (1 = yes; 0 = no) 
* oldpeak: ST depression induced by exercise relative to rest 
* slope: the slope of the peak exercise ST segment
* ca: number of major vessels (0-3) colored by flourosopy 
* thal: 3 = normal; 6 = fixed defect; 7 = reversable defect 
* target: 1 or 0 


Accessing the dataset:

In [3]:
found = False
key = "heart-disease-dataset"
description_text = "Heart disease dataset"

if key in ws.datasets.keys():
        print("Dataset {} already exists..!".format(key))
        found = True
        dataset = Dataset.get_by_name(ws, name=key)

if not found:
        # Create AML Dataset and register it into Workspace
        print("Creating {} dataset..".format(key))
        data = 'https://raw.githubusercontent.com/SadmiB/Heart-Disease-Prediction/master/heart.csv'
        dataset = Dataset.Tabular.from_delimited_files(data)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)


df = dataset.to_pandas_dataframe()
df.describe()

Creating heart-disease-dataset dataset..


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


In [4]:
dataset.take(5).to_pandas_dataframe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


# Experiment

In [5]:
# choose a name for experiment
experiment_name = 'heart-disease-experiment-ml1'
project_folder = 'heart-disease-project'
experiment=Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
heart-disease-experiment-ml1,quick-starts-ws-136593,Link to Azure Machine Learning studio,Link to Documentation


# Create or Attach an AmlCompute cluster

In [6]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "auto-ml"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

    compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)

Creating
Succeeded...............................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


## AutoML Configuration

Below the Automl settings and cofiguration we using AUC_weighted as a primary metric and the task to solve is classification. The target column is "target" which represents whether a patient has a heart disease or not.

In [7]:
# Automl settings
automl_settings = { 
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 5,
    "enable_onnx_compatible_models": True,
    "primary_metric" : 'accuracy',
    "experiment_exit_score" : 0.9984,
    "enable_early_stopping" : True,
    "featurization" : "auto"
}

# automl config
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             blocked_models = ['KNN','LinearSVM'],
                             training_data=dataset,
                             label_column_name="target",   
                             path = project_folder,
                             debug_log = "automl_errors.log",
                             **automl_settings)

In [8]:
# Submit the ml experiment
remote_run = experiment.submit(automl_config)

Running on remote.


## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [9]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()
remote_run.wait_for_completion()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

{'runId': 'AutoML_4b3e9d87-95c4-4540-b56d-7cfc88ec52ac',
 'target': 'auto-ml',
 'status': 'Completed',
 'startTimeUtc': '2021-01-30T13:00:48.78433Z',
 'endTimeUtc': '2021-01-30T13:27:30.137648Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'auto-ml',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"2bbc931e-3fdc-4cc1-a3de-762edaaab682\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 4, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://raw.githubusercontent.com/SadmiB/Heart-Disease-Prediction/master/heart.csv\\\\\\"}]}}, \\\\\\"localData\\\\\\": {}, \\\\\\"isEnabled\\\\\\": true, \\\\\\"name\\\\\\": null, \\

## Retreive and save best ONNX model

In [10]:
from azureml.automl.runtime.onnx_convert import OnnxConverter


best_run, onnx_model = remote_run.get_output(return_onnx_model=True)
onnx_path = "./best_model.onnx"
OnnxConverter.save_onnx_model(onnx_model, onnx_path)

## Test prediction using ONNX model

In [11]:
from azureml.automl.core.onnx_convert import OnnxConvertConstants
from azureml.train.automl import constants
import onnxruntime
from azureml.automl.runtime.onnx_convert import OnnxInferenceHelper
import sys
import json


if sys.version_info < OnnxConvertConstants.OnnxIncompatiblePythonVersion:
    python_version_compatible = True
else:
    python_version_compatible = False


def get_onnx_res(run):
    res_path = 'onnx_resource.json'
    run.download_file(name=constants.MODEL_RESOURCE_PATH_ONNX, output_file_path=res_path)
    with open(res_path) as f:
        onnx_res = json.load(f)
    return onnx_res

if python_version_compatible:
    test_df = dataset.to_pandas_dataframe()[:2]
    mdl_bytes = onnx_model.SerializeToString()
    onnx_res = get_onnx_res(best_run)

    onnxrt_helper = OnnxInferenceHelper(mdl_bytes, onnx_res)
    pred_onnx, pred_prob_onnx = onnxrt_helper.predict(test_df)

    print(pred_onnx)
else:
    print('Please use Python version 3.6 or 3.7 to run the inference helper.')

[1 1]


## Best Model

Getting the best model from the automl experiments and display all the properties of the model.



In [12]:
import joblib
# Retrieve and save your best automl model.

best_run, fitted_model = remote_run.get_output()
best_run_metrics = best_run.get_metrics()

print('- Best Run Id: ', best_run.id)
print('- Accuracy:', best_run_metrics['accuracy'])
print('- AUC_weighted:', best_run_metrics['AUC_weighted'])
joblib.dump(fitted_model,'azureml_fitted_model.sav')

- Best Run Id:  AutoML_4b3e9d87-95c4-4540-b56d-7cfc88ec52ac_38
- Accuracy: 0.8644086021505377
- AUC_weighted: 0.9169683691924403


['azureml_fitted_model.sav']

### Register best model

In [13]:
model_name = best_run.properties["model_name"]
description = "AutoML model for predicting heart disease."
tags = None
model = remote_run.register_model(model_name=model_name, description=description, tags=tags)

print(remote_run.model_id)

AutoML4b3e9d87938


## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [14]:
script_file_name = 'inference/score.py'

best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'inference/score.py')

### Deploy the model as a webservice using ACI

In [15]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

inference_config = InferenceConfig(entry_script=script_file_name)

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1, 
                                               tags = {'area': "bmData", 'type': "automl_classification"}, 
                                               description = 'Heart Disease prediction using Automl Classification')

aci_service_name = 'automl-heart-disease-prediction'
print(aci_service_name)
aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)
aci_service.wait_for_deployment(True)
print(aci_service.state)

automl-heart-disease-prediction
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running.............................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


### Test the web service

In [23]:
#Test 1
import requests
import json

data = json.dumps({ 
  "data": [  
    {
      "age": 63,
      "sex": 1,
      "cp": 3,
      "trestbps": 145,
      "chol": 233,
      "fbs": 1,
      "restecg": 0,
      "thalach": 150,
      "exang": 0,
      "oldpeak": 2.3,
      "slope": 0,
      "ca": 0,
      "thal": 1
    },
    { 
      "age": 57,
      "sex": 0,
      "cp": 0,
      "trestbps": 140,
      "chol": 241,
      "fbs": 0,
      "restecg": 1,
      "thalach": 123,
      "exang": 1,
      "oldpeak": 0.2,
      "slope": 1,
      "ca": 0,
      "thal": 3
    }
  ]
})

headers = {'Content-Type': 'application/json'}

resp = requests.post(aci_service.scoring_uri, data, headers=headers)

pred = json.loads(json.loads(resp.text))['result']
print(pred)

[1, 0]


### Get service logs

In [24]:
aci_service.get_logs()



### Delete the web service

In [19]:
#aci_service.delete()

End!