# Automated ML on Heart Failure Dataset

Importing dependencies

In [None]:
import os
import joblib
import azureml.core
from azureml.core import Workspace, Experiment, Dataset, Environment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig
from pprint import pprint # Used in printing automl model parameters
from azureml.core import Model # Used to get model information

In [None]:
source activate azureml_py36
pip install --upgrade azureml-sdk[notebooks,contrib,automl] 
conda install ipywidgets
jupyter nbextension install --py --user azureml.widgets
jupyter nbextension enable azureml.widgets --user --py

## Initialize Workspace

Initialize a workspace object from persisted configuration. 

In [None]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

## Create an Azure ML experiment

Create an [Experiment](https://docs.microsoft.com/en-gb/azure/machine-learning/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace.

In [None]:
# Choose a name for the run history container in the workspace
experiment_name = 'heartfailure-automl'
experiment = Experiment(ws, experiment_name)

run = experiment.start_logging()

## Create or Attach an AmlCompute cluster

Create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for AutoML run

In [None]:
# Choose a name for your cluster
# Compute name should contain only letters, digits, hyphen and should be 2-16 charachters long
#cluster_name = "aml-cluster"
cluster_name = "project-automl"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print(f'{cluster_name} exists already')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    
    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
    
    compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)
    
compute_targets = ws.compute_targets
for name, ct in compute_targets.items():
    print(name, ct.type, ct.provisioning_state)

## Dataset

### Overview

In this project, we are going to predict mortality due to heart failure with the use of AutoML. Heart failure is a common event caused by Cardiovascular diseases (CVDs), and it occurs when the heart cannot pump enough blood to meet the needs of the body.

The [Heart Failure Prediction](https://archive.ics.uci.edu/ml/datasets/Heart+failure+clinical+records) dataset is used as the training data for this task. It comprises of 299 heart failure patients and 12 features, which report clinical, body, and lifestyle information.

The task here is to train a binary classification model that predict the target column DEATH_EVENT, which indicates if the patient died or survived before the end of the follow-up period, based on the information provided by the other 11 columns (predictors). The time feature was dropped before training since we cannot get a time value for new patients after deployment. Prediction models based on these predictors, if accurate, can potentially be used to help hospitals in assessing the severity of patients with cardiovascular diseases.

In [None]:
# Try to load the dataset from the Workspace. Otherwise, create it from the file
found = False
key = "Heart-Failure Dataset"
description_text = "Heart-Failure Dataset for Captone project"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        # Create AML Dataset and register it into Workspace
        #example_data = 'https://raw.githubusercontent.com/PeacePeters/Deploy-the-best-model-using-AzureML/main/heart_failure.csv'
        example_data = 'https://raw.githubusercontent.com/PeacePeters/Heart-Failure-Prediction-using-AzureML/main/heart_failure.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        # Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)

df = dataset.to_pandas_dataframe()
df.describe()

In [None]:
# Review the dataset result
dataset.take(5).to_pandas_dataframe()

## AutoML Configuration

The AutoML settings are:
1. The model is a classification task to predict mortality caused by heart failure.
2. The primary metric used is AUC weighted, which is more appropriate than accuracy since the dataset is moderately imbalanced (67.89% negative elements and 32.11% positive elements). 
3. A cross validation of 5 folds rather than 3 is used which gives a better performance. 
4. A 30 minutes timeout is specified to constrain usage. 
5. The maximum number of iterations to be executed in parallel during training is set to 5 max concurrent iterations. 

In [None]:
# Put your automl settings here
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'AUC_weighted'
}

# Put your automl config here
automl_config = AutoMLConfig(compute_target=compute_target,
                             task="classification",
                             training_data=dataset,
                             label_column_name="DEATH_EVENT",
                             n_cross_validations=5,
                             debug_log="automl_errors.log",
                             **automl_settings
                            )

In [None]:
# Submit your experiment
remote_run = experiment.submit(automl_config)

## Run Details

In [None]:
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

## Best Model

In [None]:
# Get best run and model
best_run, best_model = remote_run.get_output()

In [None]:
best_run

In [None]:
# Get all metrics of the best run
best_run_metrics = best_run.get_metrics()

# Print all metrics of the best run
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name, metric)

In [None]:
print('Best Run Id: ' + best_run.id,
     'Best Model Name: ' + best_run.properties['model_name'])
print('\n AUC_weighted:', best_run_metrics['AUC_weighted'])

In [None]:
best_model

In [None]:
best_model._final_estimator

In [None]:
# Print detailed parameters of the fitted model
def print_model(model, prefix=""):
    for step in model.steps:
        print(prefix + step[0])
        if hasattr(step[1], 'estimators') and hasattr(step[1], 'weights'):
            pprint({'estimators': list(
                e[0] for e in step[1].estimators), 'weights': step[1].weights})
            print()
            for estimator in step[1].estimators:
                print_model(estimator[1], estimator[0] + ' - ')
        else:
            pprint(step[1].get_params())
            print()

print_model(best_model)

In [None]:
best_model.steps

In [None]:
best_run.get_tags()

## Model Deployment

In [None]:
# Register the model
model = best_run.register_model(model_path='outputs/model.pkl', model_name=experiment_name+"-model",
                   tags={'Training context':'AutoML', 'type': 'Classification'})
model

In [None]:
# List registered models to verify if model has been saved
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

In [None]:
best_run.download_file('outputs/model.pkl', './model.pkl')

# Download scoring file
best_run.download_file('outputs/scoring_file_v_1_0_0.py', './score.py')

In [None]:
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig

env = Environment.get(ws, "AzureML-AutoML")
inference_config = InferenceConfig(entry_script='./score.py', environment=env)

aci_config = AciWebservice.deploy_configuration(cpu_cores=1,
                                               memory_gb=1,
                                               enable_app_insights=True, 
                                               tags={'name': 'aci-cluster', 'framework': 'AutoML'},
                                               description='Heart Failure Predictor Web Service')

service = Model.deploy(workspace=ws, 
                           name=experiment_name+"-service",
                           models=[model], 
                           inference_config=inference_config, 
                           deployment_config=aci_config,
                           overwrite=True)

service.wait_for_deployment(show_output = True)
print(service.state)

In [None]:
print(service.scoring_uri)

print(service.swagger_uri)

In [None]:
import json

# import test data
test_df = df.sample(5) # data is the pandas dataframe of the original data
label_df = test_df.pop('DEATH_EVENT')

test_sample = json.dumps({'data': test_df.to_dict(orient='records')})
print(test_sample)

In [None]:
# predict using the deployed model
result = service.run(test_sample)
print(result)

In [None]:
# Set the content type
headers = {'Content-type': 'application/json'}

response = requests.post(service.scoring_uri, test_sample, headers=headers)

# Print results from the inference
print(response.text)

In [None]:
¡python endpoint.py

In [None]:
# Print original labels
print(label_df)

In [None]:
print(service.get_logs())

In [None]:
service.delete()