# Automated ML  

> All the dependencies needed to complete the project are listed below

In [1]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.data.dataset_factory import TabularDatasetFactory

# Dependencies required to create or attach AmlCompute cluster:
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException

from azureml.pipeline.steps import AutoMLStep

# needed to display the run details
from azureml.widgets import RunDetails

import joblib
# Needed for the deployment part
from azureml.core.environment import Environment 
from azureml.core.model import InferenceConfig 
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.model import Model

import json
import requests

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.31.0


In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

quick-starts-ws-152788
aml-quickstarts-152788
southcentralus
610d6e37-4747-4a20-80eb-3aad70a55f43


> I will bring at this stage the creation of the experiment and the creation / attachment of the AmlCompute to the workspace.  
> Doing so, I will keep the same approach that was suggested during the second Udacity project. 


In [3]:
experiment_name = 'heart-failure-experiment-automl'
project_folder = './outputs'

experiment = Experiment(ws, experiment_name)
experiment

# starting an interactive logging session, as recommended in Azure documentation 'how-to-log-view-metrics'
run=experiment.start_logging()

In [4]:
# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
cluster_name = "compute-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name= cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_v2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           min_nodes=1,
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

# I am using here the get_status() for a more detailed view of current AmlCompute status:
print(compute_target.get_status().serialize())

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 1, 'targetNodeCount': 1, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 1, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-07-28T12:19:00.942000+00:00', 'errors': None, 'creationTime': '2021-07-28T11:03:43.697963+00:00', 'modifiedTime': '2021-07-28T11:04:14.458382+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 1, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT1800S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_DS3_V2'}


## Dataset

### Overview

I will use [Kaggle](https://www.kaggle.com/andrewmvd/heart-failure-clinical-data) "Heart Failure Prediction dataset".  
This dataset is related to a study that  focused on survival analysis of 299 heart failure patients who were admitted to Institute   
of Cardiology and Allied hospital Faisalabad-Pakistan during April-December (2015).   
All the patients were aged 40 years or above, having left ventricular systolic dysfunction.  

The dataset contains the following 12 clinical features, plus one target feature ("death event"):  
A data analysis report is available onmy github repo, [here](https://github.com/JCForszp/nd00333-capstone/blob/master/Datasets/heart%20failure%20report.html)

**Clinical features:**
- age: age of the patient (years)
- anaemia: decrease of red blood cells or hemoglobin (boolean)
- high blood pressure: if the patient has hypertension (boolean)
- creatinine phosphokinase (CPK): level of the CPK enzyme in the blood (mcg/L)
- diabetes: if the patient has diabetes (boolean)
- ejection fraction: percentage of blood leaving the heart at each contraction (percentage)
- platelets: platelets in the blood (kiloplatelets/mL)
- sex: woman or man (binary)
- serum creatinine: level of serum creatinine in the blood (mg/dL)
- serum sodium: level of serum sodium in the blood (mEq/L)
- smoking: if the patient smokes or not (boolean)
- time: follow-up period (days)

**Target feature:**
- [target] death event: if the patient deceased during the follow-up period (boolean)

We are dealing here with a classification task, i.e trying to predict the outcome of the follow-up period based on the given clinical features.


TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [5]:
found = False
key = "JCF-heart-failure-dataset"
description_text = "Kaggle Heart Failure Prediction dataset"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        # Create AML Dataset and register it into Workspace
        example_data = 'https://raw.githubusercontent.com/JCForszp/Azure-Machine-Learning-Engineer-Capstone-Project/master/Datasets/heart_failure_clinical_records_dataset.csv'
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,0.431438,581.839465,0.41806,38.083612,0.351171,263358.029264,1.39388,136.625418,0.648829,0.32107,130.26087,0.32107
std,11.894809,0.496107,970.287881,0.494067,11.834841,0.478136,97804.236869,1.03451,4.412477,0.478136,0.46767,77.614208,0.46767
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,51.0,0.0,116.5,0.0,30.0,0.0,212500.0,0.9,134.0,0.0,0.0,73.0,0.0
50%,60.0,0.0,250.0,0.0,38.0,0.0,262000.0,1.1,137.0,1.0,0.0,115.0,0.0
75%,70.0,1.0,582.0,1.0,45.0,1.0,303500.0,1.4,140.0,1.0,1.0,203.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


In [6]:
print(f"Accuracy obtained by predicting the most frequent value (null acccuracy, as baseline) : {df['DEATH_EVENT'].value_counts().head(1)/len(df['DEATH_EVENT'])}.")

Accuracy obtained by predicting the most frequent value (null acccuracy, as baseline) : 0    0.67893
Name: DEATH_EVENT, dtype: float64.


## AutoML Configuration

### Note on automl settings selection:  

- **n_cross_validations**: 4,
10 is a usual value for cross-validations, but the size of the dataset is relatively small.  
Hence, a 90/10% split seems a bit disproportionate. I prefer to take a 75/25% split,  
which will end of with testing sets of 75 patients, so probably more reasonable and keeping the  
number of runs low. 
- **primary_metric**: 'accuracy',  
'accuracy' is the most frequent and easiest metrics to use for classification tasks  
- **enable_early_stopping**: True,  
According to [Azure documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.automlconfig.automlconfig?view=azure-ml-py), this  
settings allows automl to terminate a score determination if the score is not improving.  
The default value is 'False' and hence, needs to be set to 'True' at config level.  
Microsoft documentation mentions that *Early stopping window starts on the 21st iteration  
and looks for early_stopping_n_iters iterations (currently set to 10).  
This means that the first iteration where stopping can occur is the 31st.*    
Hence, this setting is a nice to have, but won't be critical for our limited exercice. 
- **max_concurrent_iterations**: 4,  
According to Microsoft documentation *Represents the maximum number of iterations that would be executed in parallel.  
The default value is 1.*  
In our compute_config, we chose a value of 4, and the number of concurrent values needs to be less or equal to that number.  
Hence, the value of this setting. 
- **experiment_timeout_minutes**: 20,    
Defines how long, in minutes, the experiment should continue to run. 
Looking at Azure documentation on [how-to-configure-auto-train](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-configure-auto-train),  
20mn seemed to be a reasonable trade-off.  
- **verbosity**: logging.INFO  
The verbosity level for writing to the log file. The default is INFO or 20. So, we could basically have skipped this setting, but it seems  
good practice to specify it every time, to assess if it's really the optimal level of detail.

### Note on automl_config settings:
- **compute_target** = compute_target,  
This is the Azure Machine Learning compute target to run the Automated Machine Learning experiment on.   
It corresponds to the compute_target we defined above in the script, right after the import of the dependencies.  
- **task**='classification',  
Three types of tasks are allowed here:'classification', 'regression', or 'forecasting'.  
As mentioned in the Dataset section, we are clearly here in a classification task. 
- **training_data**=dataset,  
This is the dataset we registered in previous cell. 
- **label_column_name**='DEATH_EVENT',  
This is the name of the target column. The original dataset on Kaggle clearly defines 'DEATH_EVENT' as being the label column.  
- **path** = project_folder,  
We set this project_folder to './capstone-project'  
- **featurization**= 'auto',  
Two values allowed: 'auto' and 'off'. Based on Microsoft doc, setting featurization to off  would mean re-doing manually    
all one-hot encoding, managing missing values,... It meakes total sense to leave automl dealing with that on a pre-cleaned dataset. 
- **debug_log** = "automl_errors.log",  
The log file to write debug information to. If not specified, 'automl.log' is used.  
I just set the name to one I chose.  
- **enable_onnx_compatible_models**=False,  
ONNX is presented as a way to optimize the inference of the ML model.[(doc)](https://docs.microsoft.com/en-us/azure/machine-learning/concept-onnx)   
We are dealing with a small-sized dataset, so I chose to leave this setting of False and I will investigate this feature separately later.   
- **automl_settings**  
Brings the automl_settings dictionary we defined above in the automl_config object. 

In [7]:
# TODO: Put your automl settings here
automl_settings = {"n_cross_validations": 5,
                    "primary_metric": 'accuracy',
                    "enable_early_stopping": True,
                    "max_concurrent_iterations": 4,
                    "experiment_timeout_minutes": 20,
                    "verbosity": logging.INFO
                    }

# TODO: Put your automl config here
automl_config = AutoMLConfig(
                            compute_target = compute_target,
                            task='classification',
                            training_data=dataset,
                            label_column_name='DEATH_EVENT',
                            path = project_folder,
                            featurization= 'auto',
                            debug_log = "automl_errors.log",
                            enable_onnx_compatible_models=False,
                            **automl_settings
                            )

In [8]:
# Experiment Submission
# Theshow_output parameter switches on the verbose logging
my_run = experiment.submit(automl_config, show_output = True)
# We use the same parameter in the wait_for_completion function on the resulting run.
my_run.wait_for_completion(show_output = True)

Submitting remote run.
No run_configuration provided, running on compute-cluster with default configuration
Running on remote compute: compute-cluster


Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-experiment-automl,AutoML_3f0e428c-7fee-482e-9140-a98fff4ce041,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

****************************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       PASSED
DESCRIPTION:  No feature missing values were detected in the training data.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization

****************************************************************************************************

TYPE:         High cardinality feature detection
STATUS

Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-experiment-automl,AutoML_3f0e428c-7fee-482e-9140-a98fff4ce041,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation



****************************************************************************************************

****************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
****************************************************************************************************

 ITERATION   PIPELINE                                       DURATION      METRIC      BEST
         0   MaxAbsScaler LightGBM                          0:00:37       0.8060    0.8060
         1   MaxAbsScaler XGBoostClassifier                 0:01:28       0.8259    0.8259
         2   MinMaxScaler RandomForest                      0:00:38       0.8293    0.8293
         3   MinMaxScaler RandomForest                      0:01:27      

{'runId': 'AutoML_3f0e428c-7fee-482e-9140-a98fff4ce041',
 'target': 'compute-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-07-28T12:39:09.640172Z',
 'endTimeUtc': '2021-07-28T12:52:44.396115Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'compute-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"a6ffd384-edf9-4546-9714-5050cdd58f09\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.31.0", "azureml-train": "1.31.0", "azureml-train-restclients-hyperdrive": "1.31.0", "azureml-train-core": "1.31.0", "azureml-train-automl": "1.31.0", "azureml-train-automl-runtime": "1.31.0", "azureml-train-automl-client": "1.31.

In [9]:
# This command fetches the run status and displays it in this notebook as confirmation. 
print("Run Status: ",my_run.get_status())

Run Status:  Completed


## Run Details

>We  use here the `RunDetails` widget to show the different experiments.

In [10]:
RunDetails(my_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Usin

In [11]:
my_run.wait_for_completion()

{'runId': 'AutoML_3f0e428c-7fee-482e-9140-a98fff4ce041',
 'target': 'compute-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-07-28T12:39:09.640172Z',
 'endTimeUtc': '2021-07-28T12:52:44.396115Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'compute-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"a6ffd384-edf9-4546-9714-5050cdd58f09\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-widgets": "1.31.0", "azureml-train": "1.31.0", "azureml-train-restclients-hyperdrive": "1.31.0", "azureml-train-core": "1.31.0", "azureml-train-automl": "1.31.0", "azureml-train-automl-runtime": "1.31.0", "azureml-train-automl-client": "1.31.

In [12]:
print("AutoML Run Summary: ", my_run.summary())

AutoML Run Summary:  [['StackEnsemble', 1, 0.8562146892655367], ['VotingEnsemble', 1, 0.8729943502824857], ['Failed', 1, nan], ['LightGBM', 11, 0.8428248587570621], ['XGBoostClassifier', 14, 0.846271186440678], ['LogisticRegression', 1, 0.8160451977401131], ['ExtremeRandomTrees', 2, 0.8394350282485876], ['RandomForest', 5, 0.8392655367231638], ['GradientBoosting', 1, 0.8428248587570621], ['SVM', 1, 0.7057627118644068]]


## Best Model

In the cell below, we get the best model from the automl experiments and display all the properties of the model.



### Retrieval of best model from the automl experiments

In [13]:
best_run, fitted_model = my_run.get_output() # Return the run with the corresponding best pipeline that has already been tested.
                                             # as we do not mention any parameter, get_output returns the best pipeline according to the primary metric ('accuracy'.
best_run

Package:azureml-automl-runtime, training version:1.32.0, current version:1.31.0
Package:azureml-core, training version:1.32.0, current version:1.31.0
Package:azureml-dataset-runtime, training version:1.32.0, current version:1.31.0
Package:azureml-defaults, training version:1.32.0, current version:1.31.0
Package:azureml-interpret, training version:1.32.0, current version:1.31.0
Package:azureml-mlflow, training version:1.32.0, current version:1.31.0
Package:azureml-pipeline-core, training version:1.32.0, current version:1.31.0
Package:azureml-telemetry, training version:1.32.0, current version:1.31.0
Package:azureml-train-automl-client, training version:1.32.0, current version:1.31.0
Package:azureml-train-automl-runtime, training version:1.32.0, current version:1.31.0


Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-experiment-automl,AutoML_3f0e428c-7fee-482e-9140-a98fff4ce041_36,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


### Display of all the properties of the best model

In [14]:
# we display, below, the metrics, details and properties (this is the order that makes most sense)

print('*'*50)
best_run_metrics = best_run.get_metrics()
for metric_name in best_run_metrics:
    metric = best_run_metrics[metric_name]
    print(metric_name,":" , metric)

print('*'*50)
print("Best run details :",best_run.get_details())

print('*'*50)
print("Best run properties :",best_run.get_properties())
print('*'*50)

**************************************************
recall_score_weighted : 0.8729943502824857
precision_score_macro : 0.8825869002123199
AUC_macro : 0.9170402362495386
precision_score_weighted : 0.8876404315985645
AUC_weighted : 0.9170402362495386
matthews_correlation : 0.7189991168025636
recall_score_micro : 0.8729943502824857
average_precision_score_macro : 0.9046341186826844
AUC_micro : 0.9201662357560089
weighted_accuracy : 0.8950749382888548
recall_score_macro : 0.8399702380952381
accuracy : 0.8729943502824857
average_precision_score_micro : 0.9220820253457738
f1_score_macro : 0.8460326632991864
log_loss : 0.3720217084648893
precision_score_micro : 0.8729943502824857
balanced_accuracy : 0.8399702380952381
f1_score_micro : 0.8729943502824857
average_precision_score_weighted : 0.927748463118907
norm_macro_recall : 0.6799404761904763
f1_score_weighted : 0.8672342871168602
confusion_matrix : aml://artifactId/ExperimentRun/dcid.AutoML_3f0e428c-7fee-482e-9140-a98fff4ce041_36/confusion_m

In [15]:
print(fitted_model)

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mnt/batch/tasks/shared/LS_root/mount...
    label_column_name=None,
    weight_column_name=None,
    cv_split_column_names=None,
    enable_streaming=None,
    timeseries_param_dict=None,
    gpu_training_param_dict={'processing_unit_type': 'cpu'}
), random_state=0, reg_alpha=0.9375, reg_lambda=1.875, subsample=0.8, tree_method='auto'))], verbose=False))], flatten_transform=None, weights=[0.2, 0.2, 0.2, 0.2, 0.2]))],
         verbose=False)


### Saving the best model

In [16]:
best_run.get_file_names()

['accuracy_table',
 'automl_driver.py',
 'azureml-logs/55_azureml-execution-tvmps_df82cce9e93bd4b7b7585d9bd7541c7b89b14864099c9aba11021fa462731c7b_d.txt',
 'azureml-logs/65_job_prep-tvmps_df82cce9e93bd4b7b7585d9bd7541c7b89b14864099c9aba11021fa462731c7b_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_df82cce9e93bd4b7b7585d9bd7541c7b89b14864099c9aba11021fa462731c7b_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'confusion_matrix',
 'explanation/dca861da/classes.interpret.json',
 'explanation/dca861da/eval_data_viz.interpret.json',
 'explanation/dca861da/expected_values.interpret.json',
 'explanation/dca861da/features.interpret.json',
 'explanation/dca861da/global_names/0.interpret.json',
 'explanation/dca861da/global_rank/0.interpret.json',
 'explanation/dca861da/global_values/0.interpret.json',
 'explanation/dca861da/local_importance_values.interpret.json',
 'explanation/dca861da/per_class_names/0.interpret.json',
 'explanati

In [17]:
best_run.get_file_names()
best_run.download_file('outputs/conda_env_v_1_0_0.yml','env.yml') # download the environment
best_run.download_file('outputs/model.pkl','automl_model.pkl')    # download the model
best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'score.py') # download the scoring file

In [18]:
automl = best_run.register_model(
                        model_name = 'fitted_model', 
                        model_path = './outputs/model.pkl',
                        )


## Model Deployment

Remember you have to deploy only one of the two models you trained.. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [19]:
# Creating an inference config
inference_config = InferenceConfig(
                                    entry_script="score.py",
                                    environment=best_run.get_environment()
                                  )

# Deploying the model as a web service to an Azure Container Instance (ACI)
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1, description='Heart Failure Prediction')

service_name = 'heartfprediction-webservice'
webservice = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[automl],
                       inference_config=inference_config,
                       deployment_config=aci_config)

webservice.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-07-28 12:58:26+00:00 Creating Container Registry if not exists.
2021-07-28 12:58:26+00:00 Registering the environment.
2021-07-28 12:58:26+00:00 Use the existing image.
2021-07-28 12:58:27+00:00 Generating deployment configuration.
2021-07-28 12:58:27+00:00 Submitting deployment to compute..
2021-07-28 12:58:31+00:00 Checking the status of deployment heartfprediction-webservice..
2021-07-28 13:02:13+00:00 Checking the status of inference endpoint heartfprediction-webservice.
Succeeded
ACI service creation operation finished, operation "Succeeded"


TODO: In the cell below, send a request to the web service you deployed to test it.

In [20]:
print(webservice.state)

print("Scoring URI: ",   webservice.scoring_uri)
print("Swagger URI: ",   webservice.swagger_uri)

Healthy
Scoring URI:  http://e1efa02c-16f1-4ef8-963c-5a21b15d3339.southcentralus.azurecontainer.io/score
Swagger URI:  http://e1efa02c-16f1-4ef8-963c-5a21b15d3339.southcentralus.azurecontainer.io/swagger.json


In [21]:
import json
#df = df.drop(columns=['DEATH_EVENT'])

input_data = json.dumps({
                        'data': df.sample(10).to_dict(orient='records')
                        })

# I sent a random sample and expect a proportion of negative death event ('0') between 5 and 7 or 8,   
# based on the dataset proportion

In [22]:
input_data

'{"data": [{"age": 53.0, "anaemia": 1, "creatinine_phosphokinase": 91, "diabetes": 0, "ejection_fraction": 20, "high_blood_pressure": 1, "platelets": 418000.0, "serum_creatinine": 1.4, "serum_sodium": 139, "sex": 0, "smoking": 0, "time": 43, "DEATH_EVENT": 1}, {"age": 95.0, "anaemia": 1, "creatinine_phosphokinase": 112, "diabetes": 0, "ejection_fraction": 40, "high_blood_pressure": 1, "platelets": 196000.0, "serum_creatinine": 1.0, "serum_sodium": 138, "sex": 0, "smoking": 0, "time": 24, "DEATH_EVENT": 1}, {"age": 60.0, "anaemia": 0, "creatinine_phosphokinase": 2656, "diabetes": 1, "ejection_fraction": 30, "high_blood_pressure": 0, "platelets": 305000.0, "serum_creatinine": 2.3, "serum_sodium": 137, "sex": 1, "smoking": 0, "time": 30, "DEATH_EVENT": 0}, {"age": 75.0, "anaemia": 0, "creatinine_phosphokinase": 582, "diabetes": 0, "ejection_fraction": 40, "high_blood_pressure": 0, "platelets": 263358.03, "serum_creatinine": 1.18, "serum_sodium": 137, "sex": 1, "smoking": 0, "time": 107, "

In [23]:
scoring_uri = webservice.scoring_uri

# Set the content type
headers = {'Content-Type': 'application/json'}

# Make the request and display the response
response = requests.post(scoring_uri, input_data, headers=headers)
response.json()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

TODO: In the cell below, print the logs of the web service and delete the service

In [24]:
print(webservice.get_logs())

2021-07-28T13:02:07,179606700+00:00 - gunicorn/run 
Dynamic Python package installation is disabled.
Starting HTTP server
2021-07-28T13:02:07,180735900+00:00 - iot-server/run 
2021-07-28T13:02:07,181302800+00:00 - rsyslog/run 
2021-07-28T13:02:07,239973600+00:00 - nginx/run 
rsyslogd: /azureml-envs/azureml_fc92eee9a5613508afa12283dd0b27d8/lib/libuuid.so.1: no version information available (required by rsyslogd)
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-07-28T13:02:07,691488600+00:00 - iot-server/finish 1 0
2021-07-28T13:02:07,699066600+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 20.1.0
Listening at: http://127.0.0.1:31311 (63)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 91
SPARK_HOME not set. Skipping PySpark Initialization.
Generating new fontManager, this may take some time...
Initializing logger
2021-07-28 13:02:10,907 | root | INFO | Starting up app insights client
logging socket was 

INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads
INFO:interpret_community.common.explanation_utils:Using default datastore for uploads


In [25]:
# webservice.delete()