# Automated ML



In [1]:
import logging
import csv
import os 
import shutil

from matplotlib import pyplot as plt
import pandas as pd
import numpy as np 
from sklearn import datasets
#import pkg_resources

import azureml.core
from azureml.core import Experiment, Model, Environment
from azureml.train.automl import AutoMLConfig
from azureml.core import Workspace, Dataset, Datastore 
from azureml.pipeline.steps import AutoMLStep
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn



print("SDK version: ", azureml.core.VERSION)

SDK version:  1.34.0


## Dataset

### Overview

Cardiovascular diseases (CVDs) are the number 1 cause of death globally, taking an estimated 17.9 million lives each year, which accounts for 31% of all deaths worlwide. Heart failure is a common event caused by CVDs and this dataset contains 12 features that can be used to predict mortality by heart failure.

Most cardiovascular diseases can be prevented by addressing behavioural risk factors such as tobacco use, unhealthy diet and obesity, physical inactivity and harmful use of alcohol using population-wide strategies.

People with cardiovascular disease or who are at high cardiovascular risk (due to the presence of one or more risk factors such as hypertension, diabetes, hyperlipidaemia or already established disease) need early detection and management wherein a machine learning model can be of great help.

In [2]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'exp-heart-failure'

experiment=Experiment(ws, experiment_name)
print(experiment)

#found = False
ds = 'workspaceblobstore'
key = 'heart_failure_dataset'
dataset = ws.datasets[key]

# if key in ws.datasets.keys():
#     found = True
#     dataset = ws.datasets[key]
# if not found:
#     data = ""
#     dataset = Dataset.Tabular.from_delimited_files(data)
#     dataset = dataset.register(workspace=ws,
#                                name=key,
#                                description=description_text)



Experiment(Name: exp-heart-failure,
Workspace: quick-starts-ws-159915)


In [3]:
df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,0.431438,581.839465,0.41806,38.083612,0.351171,263358.029264,1.39388,136.625418,0.648829,0.32107,130.26087,0.32107
std,11.894809,0.496107,970.287881,0.494067,11.834841,0.478136,97804.236869,1.03451,4.412477,0.478136,0.46767,77.614208,0.46767
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,51.0,0.0,116.5,0.0,30.0,0.0,212500.0,0.9,134.0,0.0,0.0,73.0,0.0
50%,60.0,0.0,250.0,0.0,38.0,0.0,262000.0,1.1,137.0,1.0,0.0,115.0,0.0
75%,70.0,1.0,582.0,1.0,45.0,1.0,303500.0,1.4,140.0,1.0,1.0,203.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


## AutoML Configuration

{"experiment_timeout_minutes": 15, -- setting experiment to run for 15 mins

"max_concurrent_iterations": 6, -- number of parallel model itertaion

"primary_metric": "accuracy"} -- choosing metric accuracy to evaluate model

In [4]:
automl_settings = {"experiment_timeout_minutes": 15,
                   "max_concurrent_iterations": 6,
                   "primary_metric": "accuracy"}

automl_config = AutoMLConfig(task="classification",
                              compute_target="mlops-compute",
                              training_data=dataset,
                              label_column_name="DEATH_EVENT",
                              n_cross_validations=4,
                              **automl_settings)

In [5]:
remote_run = experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
exp-heart-failure,AutoML_c2fe52fb-8c82-4dfa-b38d-512be1a9b73a,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

Voting Ensemble model turned out to be the best model as it combines the predictions of multiple classifiers

In [6]:
RunDetails(remote_run).show()
remote_run.wait_for_completion(show_output=True)

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Experiment,Id,Type,Status,Details Page,Docs Page
exp-heart-failure,AutoML_c2fe52fb-8c82-4dfa-b38d-512be1a9b73a,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

******************************************************************

{'runId': 'AutoML_c2fe52fb-8c82-4dfa-b38d-512be1a9b73a',
 'target': 'mlops-compute',
 'status': 'Completed',
 'startTimeUtc': '2021-10-03T16:37:34.417073Z',
 'endTimeUtc': '2021-10-03T16:54:28.851089Z',
 'services': {},
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '4',
  'target': 'mlops-compute',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"e74a4762-5499-4f6b-b31c-dcad860487dd\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.34.0", "azureml-train": "1.34.0", "azureml-train-restclients-hyperdrive": "1.34.0", "azureml-train-core": "1.34.0", "azureml-train-automl": "1.34.0", "azureml-train-automl-runtime": "1.34.0", "azureml-train-automl-cl

## Best Model


In [7]:
best_run, fitted_model = remote_run.get_output()
print(fitted_model)
print(best_run)


Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mnt/batch/tasks/shared/LS_root/mount...
    gpu_training_param_dict={'processing_unit_type': 'cpu'}
), random_state=None, reg_alpha=0.15789473684210525, reg_lambda=0, subsample=0.29736842105263156))], verbose=False))], flatten_transform=None, weights=[0.1111111111111111, 0.2222222222222222, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111]))],
         verbose=False)
Run(Experiment: exp-heart-failure,
Id: AutoML_c2fe52fb-8c82-4dfa-b38d-512be1a9b73a_50,
Type: azureml.scriptrun,
Status: Completed)


In [9]:
model = remote_run.register_model(model_name='best-automl-model')
print(model)

Model(workspace=Workspace.create(name='quick-starts-ws-159915', subscription_id='aa7cf8e8-d23f-4bce-a7b9-1f0b4e0ac8ee', resource_group='aml-quickstarts-159915'), name=best-automl-model, id=best-automl-model:2, version=2, tags={}, properties={})


## Model Deployment


In [10]:
# from azureml.train.automl.run import AutoMLRun
# run_id = "AutoML_8e747eb7-a067-497e-907b-9064d3104f5c_49"
# best_run = AutoMLRun(experiment, run_id)


In [11]:
# script_file = 'scoring.py'
# best_run.download_file('outputs/scoring_file_v_2_0_0.py', 'scoring.py')



heart-failure-prediction
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-10-01 15:34:10+00:00 Creating Container Registry if not exists..
2021-10-01 15:44:10+00:00 Registering the environment.
2021-10-01 15:44:11+00:00 Use the existing image.
2021-10-01 15:44:11+00:00 Generating deployment configuration.
2021-10-01 15:44:12+00:00 Submitting deployment to compute..
2021-10-01 15:44:17+00:00 Checking the status of deployment heart-failure-prediction..
2021-10-01 15:49:46+00:00 Checking the status of inference endpoint heart-failure-prediction.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [None]:
# inference_config = InferenceConfig(entry_script='score.py', environment=best_run.get_environment())
# aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)
# aci_service_name = 'heart-failure-model-prediction'


In [None]:
# print(aci_service_name)
# aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)
# aci_service.wait_for_deployment(True)
# print(aci_service.state)

In [12]:
# aci_service.update(enable_app_insights=True)

## Checking Prediction on Test Data

In [13]:
# df_test = df.sample(10)
# df_test_label = df_test.pop('DEATH_EVENT')

In [14]:
# import json 
# import requests 

# df_test_sample = json.dumps({'data': test_data.to_dict(orient='records')})


'{"result": [0, 1, 0, 0, 0, 1, 0, 0, 1, 0]}'

In [None]:
# service_response = aci_service.run(input_data=df_test_sample)
# service_response

In [10]:
# df_test_data.head(2).to_dict(orient='records')

In [23]:
#%run endpoint.py

{"result": [0, 1]}


## Printing the Logs

In [None]:
# print(aci_service.get_logs())

# Deleting the endpoint service

In [22]:
# aci_service.delete()