# Automated ML

In [1]:
import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.core.webservice import Webservice
from azureml.core.webservice import AciWebservice
from azureml.core.model import InferenceConfig
from azureml.core.model import Model

print("SDK version:", azureml.core.VERSION)

SDK version: 1.36.0


## Dataset

### Overview
I'll be usingthe datset that was suggested in starter file:
[Heart Failure](https://archive.ics.uci.edu/ml/datasets/Heart+failure+clinical+records#).

In [2]:
ws = Workspace.from_config()
experiment_name = 'heart-failure-experiment'

experiment=Experiment(ws, experiment_name)


# check if data already available
key = 'heart-failure'
description_text = 'heart failure data. See https://archive.ics.uci.edu/ml/datasets/Heart+failure+clinical+records# for more information.'
found = False

if key in ws.datasets.keys(): 
    found = True
    dataset = ws.datasets[key] 

if not found:
    # register the dataset
    data = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv'
    dataset = Dataset.Tabular.from_delimited_files(data)
    dataset = dataset.register(workspace=ws,
                               name=key,
                               description=description_text)


df = dataset.to_pandas_dataframe()
df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


### Create compute cluster


In [4]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
amlcompute_cluster_name = "cpu-cluster"

# check, if a cluster exists and does not need to be provisioned
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=10)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

# compute_target.wait_for_completion(show_output=True, min_node_count = 2, timeout_in_minutes = 10)

Found existing cluster, use it.


## AutoML Configuration

As stated above, we will classify individuals as being likely to die in the next period.
Different to the notebook using hyperparameter tuning, however, we will make use of Azure's AutoML capabilities in this approach.
To be cautious when evaluating fit, weighted AUC is used as the primary metric.

In [5]:
# TODO: Put your automl settings here
automl_settings = {"max_concurrent_iterations": 9,  #max possible number, since cluster has 10 = 9 exp + 1 parent
                   "primary_metric" : 'AUC_weighted'
}
project_folder = './aml-heart-failure'

# TODO: Put your automl config here
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             experiment_timeout_minutes = 20,
                             training_data=dataset,
                             label_column_name="DEATH_EVENT",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "aml-error-log.log",
                             **automl_settings)

In [6]:
# TODO: Submit your experiment
aml_run = experiment.submit(automl_config, show_output=False)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
heart-failure-experiment,AutoML_d3d7619a-7438-4a73-a422-7771f8b72101,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

To get details on the run, you can have a look at the `RunDetails` widget.

In [7]:
from azureml.widgets import RunDetails
RunDetails(aml_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Best Model

After the run has finished, you can get the best model and have a look at the specifica of the same.



In [8]:
best_run_automl, fitted_model_automl = aml_run.get_output()
model_name = best_run_automl.properties['model_name']

In [9]:
print(best_run_automl)

Run(Experiment: heart-failure-experiment,
Id: AutoML_d3d7619a-7438-4a73-a422-7771f8b72101_74,
Type: azureml.scriptrun,
Status: Completed)


In [10]:
fitted_model_automl.steps

[('datatransformer',
  DataTransformer(
      task='classification',
      is_onnx_compatible=False,
      enable_feature_sweeping=True,
      enable_dnn=False,
      force_text_dnn=False,
      feature_sweeping_timeout=86400,
      featurization_config=None,
      is_cross_validation=True,
      feature_sweeping_config={}
  )),
 ('prefittedsoftvotingclassifier',
  PreFittedSoftVotingClassifier(
      estimators=[('46', Pipeline(
          memory=None,
          steps=[('standardscalerwrapper', StandardScalerWrapper(
              copy=True,
              with_mean=False,
              with_std=False
          )), ('randomforestclassifier', RandomForestClassifier(
              bootstrap=True,
              ccp_alpha=0.0,
              class_weight='balanced',
              criterion='gini',
              max_depth=None,
              max_features=0.5,
              max_leaf_nodes=None,
              max_samples=None,
              min_impurity_decrease=0.0,
              min_impurity_

In [11]:
# taken from :
# https://docs.microsoft.com/de-de/azure/machine-learning/how-to-configure-auto-features

from pprint import pprint

def print_model(model, prefix=""):
    for step in model.steps:
        print(prefix + step[0])
        if hasattr(step[1], 'estimators') and hasattr(step[1], 'weights'):
            pprint({'estimators': list(
                e[0] for e in step[1].estimators), 'weights': step[1].weights})
            print()
            for estimator in step[1].estimators:
                print_model(estimator[1], estimator[0] + ' - ')
        else:
            pprint(step[1].get_params())
            print()

print_model(fitted_model_automl)

datatransformer
{'enable_dnn': False,
 'enable_feature_sweeping': True,
 'feature_sweeping_config': {},
 'feature_sweeping_timeout': 86400,
 'featurization_config': None,
 'force_text_dnn': False,
 'is_cross_validation': True,
 'is_onnx_compatible': False,
 'observer': None,
 'task': 'classification',
 'working_dir': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/notebook170222/code/Users/odl_user_170222/udacity-final/starter_file'}

prefittedsoftvotingclassifier
{'estimators': ['46', '30', '58', '48', '33', '24', '3', '47', '45'],
 'weights': [0.26666666666666666,
             0.06666666666666667,
             0.06666666666666667,
             0.06666666666666667,
             0.13333333333333333,
             0.06666666666666667,
             0.06666666666666667,
             0.06666666666666667,
             0.2]}

46 - standardscalerwrapper
{'class_name': 'StandardScaler',
 'copy': True,
 'module_name': 'sklearn.preprocessing._data',
 'with_mean': False,
 'with_std': False}

46 -

In [12]:
best_run_automl.get_file_names()

['accuracy_table',
 'automl_driver.py',
 'confusion_matrix',
 'logs/azureml/18_azureml.log',
 'logs/azureml/azureml_automl.log',
 'outputs/conda_env_v_1_0_0.yml',
 'outputs/engineered_feature_names.json',
 'outputs/env_dependencies.json',
 'outputs/featurization_summary.json',
 'outputs/internal_cross_validated_models.pkl',
 'outputs/model.pkl',
 'outputs/pipeline_graph.json',
 'outputs/scoring_file_v_1_0_0.py',
 'outputs/scoring_file_v_2_0_0.py',
 'system_logs/cs_capability/cs-capability.log',
 'system_logs/hosttools_capability/hosttools-capability.log',
 'system_logs/lifecycler/execution-wrapper.log',
 'system_logs/lifecycler/lifecycler.log',
 'system_logs/lifecycler/vm-bootstrapper.log',
 'user_logs/std_log.txt']

In [13]:
best_run_automl.get_metrics()

{'precision_score_weighted': 0.8585312359034027,
 'AUC_weighted': 0.9178054571816012,
 'f1_score_micro': 0.842528735632184,
 'norm_macro_recall': 0.6485009320072249,
 'recall_score_micro': 0.842528735632184,
 'precision_score_macro': 0.8219380241455051,
 'recall_score_macro': 0.8242504660036125,
 'accuracy': 0.842528735632184,
 'f1_score_weighted': 0.8419072653280326,
 'AUC_macro': 0.9178054571816012,
 'recall_score_weighted': 0.842528735632184,
 'average_precision_score_weighted': 0.9314271581501158,
 'matthews_correlation': 0.644675168109656,
 'weighted_accuracy': 0.8540780600207262,
 'AUC_micro': 0.9231835116924296,
 'log_loss': 0.3999267558987434,
 'f1_score_macro': 0.8133131655472596,
 'precision_score_micro': 0.842528735632184,
 'average_precision_score_macro': 0.9099130955220188,
 'balanced_accuracy': 0.8242504660036125,
 'average_precision_score_micro': 0.9252395195733742,
 'confusion_matrix': 'aml://artifactId/ExperimentRun/dcid.AutoML_d3d7619a-7438-4a73-a422-7771f8b72101_74/c

In [14]:
best_run_automl.properties

{'runTemplate': 'automl_child',
 'pipeline_id': '__AutoML_Ensemble__',
 'pipeline_spec': '{"pipeline_id":"__AutoML_Ensemble__","objects":[{"module":"azureml.train.automl.ensemble","class_name":"Ensemble","spec_class":"sklearn","param_args":[],"param_kwargs":{"automl_settings":"{\'task_type\':\'classification\',\'primary_metric\':\'AUC_weighted\',\'verbosity\':20,\'ensemble_iterations\':15,\'is_timeseries\':False,\'name\':\'heart-failure-experiment\',\'compute_target\':\'cpu-cluster\',\'subscription_id\':\'976ee174-3882-4721-b90a-b5fef6b72f24\',\'region\':\'southcentralus\',\'spark_service\':None}","ensemble_run_id":"AutoML_d3d7619a-7438-4a73-a422-7771f8b72101_74","experiment_name":"heart-failure-experiment","workspace_name":"quick-starts-ws-170222","subscription_id":"976ee174-3882-4721-b90a-b5fef6b72f24","resource_group_name":"aml-quickstarts-170222"}}]}',
 'training_percent': '100',
 'predicted_cost': None,
 'iteration': '74',
 '_aml_system_scenario_identification': 'Remote.Child',
 '

In [15]:
best_run_automl.properties['run_algorithm']

'VotingEnsemble'

In [16]:
best_run_automl.properties['run_properties']

"\n    estimators=[('46', Pipeline(\n        memory=None,\n        steps=[('standardscalerwrapper', StandardScalerWrapper(\n            copy=True,\n            with_mean=False,\n            with_std=False\n        "

In [17]:
best_run_automl.properties['ensembled_algorithms']

"['RandomForest', 'GradientBoosting', 'GradientBoosting', 'LightGBM', 'RandomForest', 'ExtremeRandomTrees', 'RandomForest', 'RandomForest', 'GradientBoosting']"

In [18]:
#Save the best model - download pkl and register model in Azure
model_name = 'aml-best-heart-failure'
model = best_run_automl.register_model(model_name, model_path='outputs/model.pkl')

In [19]:
best_run_automl.download_file('outputs/scoring_file_v_2_0_0.py', 'automl_output/score.py')
best_run_automl.download_file('outputs/model.pkl', 'automl_output/model.pkl')

In [20]:
print(model)

Model(workspace=Workspace.create(name='quick-starts-ws-170222', subscription_id='976ee174-3882-4721-b90a-b5fef6b72f24', resource_group='aml-quickstarts-170222'), name=aml-best-heart-failure, id=aml-best-heart-failure:1, version=1, tags={}, properties={})


## Model Deployment

All thats left to do is deploy the model.
After successful deployment, we can trigger the endpoint using a request.

In [21]:
service_name = 'aml-service'
inference_config = InferenceConfig(entry_script='automl_output/score.py')
service = Model.deploy(ws, service_name, [model], inference_config, overwrite=True)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-12-29 13:42:17+00:00 Creating Container Registry if not exists..
2021-12-29 13:52:18+00:00 Use the existing image.
2021-12-29 13:52:18+00:00 Submitting deployment to compute..
2021-12-29 13:52:22+00:00 Checking the status of deployment aml-service..
2021-12-29 13:56:39+00:00 Checking the status of inference endpoint aml-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [35]:
print(service.state)

Healthy


In [34]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
data = {
    "Inputs": {
        "data":
        [
            {
                'age': "70",
                'anaemia': "1",
                'creatinine_phosphokinase': "582",
                'diabetes': "0",
                'ejection_fraction': "17",
                'high_blood_pressure': "0",
                'platelets': "265000",
                'serum_creatinine': "3",
                'serum_sodium': "127",
                'sex': "0",
                'smoking': "0",
                'time': "5",
            },
        ],
    },
    "GlobalParameters": {
    }
}

  
body = str.encode(json.dumps(data))

url = 'http://10334fe6-8145-4cab-856b-7f0e2bfc0211.southcentralus.azurecontainer.io/score'
api_key = '' # Replace this with the API key for the web service; here: no key
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(json.loads(error.read().decode("utf8", 'ignore')))

b'{"Results": [1]}'


Delete the service:

In [36]:
service.delete()

In [52]:
print(service.state)

Deleting
