# Automated ML

Import all the dependencies.

In [3]:
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core.dataset import Dataset
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails

import joblib
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration


### Workspace & Experiment
The config.json file is downloaded from Azure environment and has to be in the project folder in order for this cell to run.

In [4]:
ws = Workspace.from_config()

experiment_name = 'CapstoneExp'

experiment=Experiment(ws, experiment_name)
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = experiment.start_logging()

Workspace name: quick-starts-ws-141376
Azure region: southcentralus
Subscription id: 48a74bb7-9950-4cc1-9caa-5d50f995cc55
Resource group: aml-quickstarts-141376


### Set Compute cluster

Creating Compute target for AutoML run. If Compute target named "notebook141376" not found then create new one using AmlCompute as the training compute resource.


In [8]:


# Create compute cluster
# Use vm_size = "Standard_DS3_V2" in your provisioning configuration.
# max_nodes should be no greater than 4.

compute_target =None
cpu_cluster_name = 'notebook141376'

try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS32_V2',
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
    compute_target.wait_for_completion(show_output=True)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing cluster, use it.
{'errors': [], 'creationTime': '2021-03-26T02:13:54.939281+00:00', 'createdBy': {'userObjectId': '67d49ca1-a9b6-41ae-93cd-e57676dbaa57', 'userTenantId': '660b3398-b80e-49d2-bc5b-ac1dc93b5254', 'userName': None}, 'modifiedTime': '2021-03-26T02:16:57.291865+00:00', 'state': 'Running', 'vmSize': 'STANDARD_DS3_V2'}


## Dataset

### Overview
The dataset used is taken from Kaggle and the data comes from 299 patients with heart failure collected at the Faisalabad Institute of Cardiology and at the Allied Hospital in Faisalabad (Punjab, Pakistan), during April–December 2015. The patients consisted of both women (105) and men (194), and the main task of the project is to classify the patients based on their odds of survival.


#### Register Dataset
If dataset - heart_failure_clinical_records don't exist then registering dataset into Workspace from GitHub Repository (From Web files).

Dataset features:

    - *age*: Age of patient 
    - *anaemia*: Decrease of red blood cells or hemoglobin
    - *creatinine-phosphokinase*: Level of the CPK enzyme in the blood
    - *diabetes*: Whether the patient has diabetes or not
    - *ejection_fraction*: Percentage of blood leaving the heart at each contraction
    - *high_blood_pressure*: Whether the patient has hypertension or not
    - *platelets*: Platelets in the blood
    - *serum_creatinine*: Level of creatinine in the blood
    - *serum_sodium*: Level of sodium in the blood
    - *sex*: Female (F) or Male (M)
    - *smoking*: Whether the patient smokes or not
    -  *time*: Follow-up period
    -  *DEATH_EVENT*: Whether the patient died during the follow-up period


In [5]:


found = False
key = "heart_failure_clinical_records"
description_text = "heart_failure_clinical_records"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

print (found)

if not found:
        # Create AML Dataset and register it into Workspace
        example_data = 'https://raw.githubusercontent.com/DharmeshHub/CapstoneProject/main/heart_failure_clinical_records_dataset.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)



df = dataset.to_pandas_dataframe()
df.describe()

print(df.head())
print("*********************************************************************************")
print(df.describe())


False
    age  anaemia  creatinine_phosphokinase  diabetes  ejection_fraction  \
0  75.0        0                       582         0                 20   
1  55.0        0                      7861         0                 38   
2  65.0        0                       146         0                 20   
3  50.0        1                       111         0                 20   
4  65.0        1                       160         1                 20   

   high_blood_pressure  platelets  serum_creatinine  serum_sodium  sex  \
0                    1  265000.00               1.9           130    1   
1                    0  263358.03               1.1           136    1   
2                    0  162000.00               1.3           129    1   
3                    0  210000.00               1.9           137    1   
4                    0  327000.00               2.7           116    0   

   smoking  time  DEATH_EVENT  
0        0     4            1  
1        0     6            1  
2 

In [6]:
ds=dataset
ds

{
  "source": [
    "https://raw.githubusercontent.com/DharmeshHub/CapstoneProject/main/heart_failure_clinical_records_dataset.csv"
  ],
  "definition": [
    "GetFiles",
    "ParseDelimited",
    "DropColumns",
    "SetColumnTypes"
  ],
  "registration": {
    "id": "b4e7061f-e486-4a2c-ae1a-72b2bcd93d71",
    "name": "heart_failure_clinical_records",
    "version": 1,
    "description": "heart_failure_clinical_records",
    "workspace": "Workspace.create(name='quick-starts-ws-141376', subscription_id='48a74bb7-9950-4cc1-9caa-5d50f995cc55', resource_group='aml-quickstarts-141376')"
  }
}

## AutoML Configuration

#### Here is the overview of automl settings and configuration used for AutoML run.

- _experiment_timeout_minutes=20_

     This is an exit criterion and is used to define how long (in minutes), the experiment should continue to run. To help avoid experiment time out failures, I used the minimum of 20 minutes.


- _max_concurrent_iterations_: 4

    It represents the maximum number of iterations that would be executed in parallel.


- _primary_metric='accuracy'_

    I chose accuracy as the primary metric for this classification model.


- _enable_early_stopping=True_

     Early stopping helps in performance, it terminates poor performing run and fully run good performing run.


- _n_cross_validations=2_

     This parameter sets how many cross validations to perform, based on the same number of folds (subsets). Two folds for cross-validation are defined. So, two different trainings, each training using 1/2 of the data, and each validation using 1/2 of the data with a different holdout fold each time.


- _compute_target = compute_target_

     The Azure Machine Learning compute target to run the Automated Machine Learning experiment on.


- _task='classification'_

     This defines the experiment type which in this case is classification.


- _training_data = dataset_

     The training data to be used within the experiment.


- _lable_column_name = "DEATH_EVENT"_
     
     The name of the label (target) column. This parameter is applicable to training_data and validation_data parameters.


- _path = project_folder_

     The full path to the Azure Machine Learning project folder.


- _featurization=auto_

     Featurization is done automatically, i.e. normalization technique are applied to your data. This help certain algorithms that are sensitive to features on different scales.


- _debug_log = automl_errors.log_

     The log file to write debug information to.



In [9]:

project_folder = './capstone-project'

# TODO: Put your automl settings here
automl_settings = {"experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 4,
    "primary_metric" : 'accuracy',
    "enable_early_stopping" : True,
    "n_cross_validations": 2}

# TODO: Put your automl config here
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="DEATH_EVENT",   
                             path = project_folder,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings)

In [10]:
# Submit your experiment
automl_run = experiment.submit(automl_config, show_output = True)
automl_run.wait_for_completion()

Running on remote.
No run_configuration provided, running on notebook141376 with default configuration
Running on remote compute: notebook141376
Parent Run ID: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputa

{'runId': 'AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d',
 'target': 'notebook141376',
 'status': 'Completed',
 'startTimeUtc': '2021-03-26T03:20:19.067632Z',
 'endTimeUtc': '2021-03-26T04:01:54.954027Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '2',
  'target': 'notebook141376',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"190a5476-dd78-4f0b-aaf1-aeb46ea6537f\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 4, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://raw.githubusercontent.com/DharmeshHub/CapstoneProject/main/heart_failure_clinical_records_dataset.csv\\\\\\"}]}}, \\\\\\"localData\\\\\\": {}, \\\\\\"isEnabled\\

## Run Details

Use the `RunDetails` widget to show the different experiments.

In [11]:

RunDetails(automl_run).show()


_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [13]:
for child_run in automl_run.get_children():
    print('===================================================')
    print(child_run)

Run(Experiment: CapstoneExp,
Id: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_15,
Type: azureml.scriptrun,
Status: Completed)
Run(Experiment: CapstoneExp,
Id: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_14,
Type: azureml.scriptrun,
Status: Completed)
Run(Experiment: CapstoneExp,
Id: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_13,
Type: azureml.scriptrun,
Status: Canceled)
Run(Experiment: CapstoneExp,
Id: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_12,
Type: azureml.scriptrun,
Status: Canceled)
Run(Experiment: CapstoneExp,
Id: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_11,
Type: azureml.scriptrun,
Status: Canceled)
Run(Experiment: CapstoneExp,
Id: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_10,
Type: azureml.scriptrun,
Status: Canceled)
Run(Experiment: CapstoneExp,
Id: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_9,
Type: azureml.scriptrun,
Status: Completed)
Run(Experiment: CapstoneExp,
Id: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_8,
Type: azureml.scriptrun,
Status: Completed)
Run(Ex

## Best Model

Get the best model from the automl experiments and display all the properties of the model.



In [15]:
# Get your best run


best_run, fitted_model = automl_run.get_output()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details() ['runDefinition']['arguments']

print(best_run)
print('\n')
print('############################################################')
print('\n')
print(fitted_model)


Run(Experiment: CapstoneExp,
Id: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_14,
Type: azureml.scriptrun,
Status: Completed)


############################################################


Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                        decision_function_shape='ovr',
                                                                                   

In [16]:

print('Best Run Id: ', best_run.id)
print('\n')
print('############################################################')
print('\n')
print("Best run metrics :",best_run.get_metrics())
print('\n')
print('############################################################')
print('\n')
print('Accuracy:', best_run_metrics['accuracy'])
print('\n')
print('############################################################')
print('\n')
print("Best run details :",best_run.get_details())
print('\n')
print('############################################################')
print('\n')
print('Regularization Rate:',parameter_values)
print('\n')
print('############################################################')
print('\n')
print("Best run file names :",best_run.get_file_names())
print('\n')
print('############################################################')
print('\n')


Best Run Id:  AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_14


############################################################


Best run metrics : {'AUC_micro': 0.9020951818987133, 'precision_score_micro': 0.8394854586129754, 'AUC_macro': 0.891918848854056, 'precision_score_weighted': 0.8420753758060664, 'recall_score_macro': 0.792165581210365, 'balanced_accuracy': 0.792165581210365, 'average_precision_score_weighted': 0.8960098958231522, 'average_precision_score_micro': 0.8994388490345169, 'norm_macro_recall': 0.5843311624207298, 'precision_score_macro': 0.8353496522883861, 'average_precision_score_macro': 0.8717172095124446, 'accuracy': 0.8394854586129754, 'f1_score_macro': 0.8044239559103992, 'recall_score_weighted': 0.8394854586129754, 'matthews_correlation': 0.6247206777478671, 'AUC_weighted': 0.891918848854056, 'f1_score_weighted': 0.8337482955456812, 'weighted_accuracy': 0.8763081410766242, 'recall_score_micro': 0.8394854586129754, 'f1_score_micro': 0.8394854586129754, 'log_loss':

In [20]:
fitted_model.steps[1][1].estimators

In [21]:

from pprint import pprint
def print_model(model, prefix=""):

    for step in model.steps:
        print(prefix + step[0])
        if hasattr(step[1], 'estimators') and hasattr(step[1], 'weights'):
            pprint({'estimators': list(
            e[0] for e in step[1].estimators), 'weights': step[1].weights})
            print()
    
            for estimator in step[1].estimators:
                print('############################################################')
                print('\n')
                print_model(estimator[1], estimator[0] + ' - ')
        else:
            pprint(step[1].get_params())

print()

print_model(fitted_model)






datatransformer
{'enable_dnn': None,
 'enable_feature_sweeping': None,
 'feature_sweeping_config': None,
 'feature_sweeping_timeout': None,
 'featurization_config': None,
 'force_text_dnn': None,
 'is_cross_validation': None,
 'is_onnx_compatible': None,
 'logger': None,
 'observer': None,
 'task': None,
 'working_dir': None}
prefittedsoftvotingclassifier
{'estimators': ['4', '3', '2', '0', '5', '9', '7'],
 'weights': [0.3333333333333333,
             0.1111111111111111,
             0.1111111111111111,
             0.1111111111111111,
             0.1111111111111111,
             0.1111111111111111,
             0.1111111111111111]}

############################################################


4 - minmaxscaler
{'copy': True, 'feature_range': (0, 1)}
4 - randomforestclassifier
{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': 'balanced',
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'log2',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0

In [22]:
best_run


Experiment,Id,Type,Status,Details Page,Docs Page
CapstoneExp,AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_14,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


In [23]:
best_run.get_file_names()

# Download the yaml file that includes the environment dependencies
best_run.download_file('outputs/conda_env_v_1_0_0.yml', 'env.yml')



In [24]:
# Download the model file

best_run.download_file('outputs/model.pkl', 'CapstoneAutoml.pkl')

In [25]:
# Save the best model

best_run.register_model(model_name = "CapstoneAutoml.pkl", model_path = './outputs/')

print(best_run)

Run(Experiment: CapstoneExp,
Id: AutoML_b2b5bd0a-c886-4979-9f5e-d1643a76db5d_14,
Type: azureml.scriptrun,
Status: Completed)


## Model Deployment

Deploy best run model, register it, create an inference config and deploy the model as a web service.

In [26]:
from azureml.core.model import InferenceConfig 
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.model import Model


model = automl_run.register_model(model_name = 'CapstoneAutoml.pkl')
print(automl_run.model_id)

# https://knowledge.udacity.com/questions/463620

environment = best_run.get_environment()
entry_script='inference/scoring.py'
best_run.download_file('outputs/scoring_file_v_1_0_0.py', entry_script)


inference_config = InferenceConfig(entry_script = entry_script, environment = environment)

# Deploying the model via ACI WebService
# https://github.com/MicrosoftDocs/azure-docs/blob/master/articles/machine-learning/how-to-deploy-azure-container-instance.md

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                                    memory_gb = 1, 
                                                    auth_enabled= True, 
                                                    enable_app_insights= True)

service = Model.deploy(ws, "aciservice", [model], inference_config, deployment_config)
service.wait_for_deployment(show_output = True)

CapstoneAutoml.pkl
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-03-26 04:34:03+00:00 Creating Container Registry if not exists.
2021-03-26 04:34:03+00:00 Registering the environment.
2021-03-26 04:34:05+00:00 Use the existing image.
2021-03-26 04:34:06+00:00 Generating deployment configuration.
2021-03-26 04:34:07+00:00 Submitting deployment to compute..
2021-03-26 04:34:12+00:00 Checking the status of deployment aciservice..
2021-03-26 04:35:03+00:00 Checking the status of inference endpoint aciservice.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [27]:
# Getting the service state
# The scorig URI & the primary authentication key are copied to the endpoint.py file in order to test the deployed service.
# The Swagger URI can be used in Swagger UI: https://petstore.swagger.io/ For more info, please see the relevant part in the README file.

# Authentication is enabled, so I use the get_keys method to retrieve the primary and secondary authentication keys:
primary, secondary = service.get_keys()

print('Service state: ' + service.state)
print('Service scoring URI: ' + service.scoring_uri)
print('Service Swagger URI: ' + service.swagger_uri)
print('Service primary authentication key: ' + primary)

Service state: Healthy
Service scoring URI: http://814e2718-3a40-430c-9ff6-e45508382635.southcentralus.azurecontainer.io/score
Service Swagger URI: http://814e2718-3a40-430c-9ff6-e45508382635.southcentralus.azurecontainer.io/swagger.json
Service primary authentication key: utNXRCEWOL7zqjJjpnfdVQdJar8ZEG7z


### Send a request to the web service you deployed to test it.

In [28]:
# Sending a request to the deployed web service to test it: consuming model endpoint

%run endpoint.py

{"result": [1, 1, 0]}


Print the logs of the web service

In [29]:
print(service.get_logs())

2021-03-26T04:34:58,732356300+00:00 - iot-server/run 
2021-03-26T04:34:58,740535000+00:00 - gunicorn/run 
2021-03-26T04:34:58,747039400+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_2b14f450572e78de640d54eaabed5e4d/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_2b14f450572e78de640d54eaabed5e4d/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_2b14f450572e78de640d54eaabed5e4d/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_2b14f450572e78de640d54eaabed5e4d/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_2b14f450572e78de640d54eaabed5e4d/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
2021-03-26T04:34:58,755949100+00:00 - rsyslog/run 
rsyslogd

### Delete the service

In [30]:
service.delete()