# Automated ML

Import all dependencies

In [11]:
import json
import azureml.core
from azureml.core import Workspace, Experiment, Model
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.widgets import RunDetails
from azureml.train.automl import AutoMLConfig
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.dataset import Dataset


print('SDK version:', azureml.core.VERSION)



SDK version: 1.42.0


## Initialize workspace
Initialize a workspace from configuration

In [5]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

# choose a name for experiment
experiment_name = 'automl-heart-experiment'

experiment=Experiment(ws, experiment_name)

run = experiment.start_logging()

main-workspace
mlops
eastus2
167b9ae9-f575-4021-810b-253a2e5be2bd


## Create Compute Cluster

In [6]:
amlcompute_cluster_name = "auto-ml"

# Verify if the cluster does not exist
try:
    compute_target = ComputeTarget(
        workspace = ws,
        name = amlcompute_cluster_name
    )
    print('Found Existing cluster, use it')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size = 'STANDARD_D2_V2',
        max_nodes = 4
    )
    compute_target = ComputeTarget.create(
        ws, 
        amlcompute_cluster_name,
        compute_config
    )
compute_target.wait_for_completion(
    show_output = True,
    min_node_count = 1,
    timeout_in_minutes = 3
)

Found Existing cluster, use it
Succeeded.....................................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


## Dataset
I used the Heart Failure Dataset from kaggle.
Cardiovascular diseases (CVDs) are the number 1 cause of death globally, taking an estimated 17.9 million lives each year, which accounts for 31% of all deaths worlwide.
Heart failure is a common event caused by CVDs and this dataset contains 12 features that can be used to predict mortality by heart failure.

Most cardiovascular diseases can be prevented by addressing behavioural risk factors such as tobacco use, unhealthy diet and obesity, physical inactivity and harmful use of alcohol using population-wide strategies.

People with cardiovascular disease or who are at high cardiovascular risk (due to the presence of one or more risk factors such as hypertension, diabetes, hyperlipidaemia or already established disease) need early detection and management wherein a machine learning model can be of great help.

I want to create a model to a model to predict mortality by heart failure.

In [14]:
key = "heart-failure"

dataset = ws.datasets[key]

df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
count,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0,299.0
mean,60.833893,0.431438,581.839465,0.41806,38.083612,0.351171,263358.029264,1.39388,136.625418,0.648829,0.32107,130.26087,0.32107
std,11.894809,0.496107,970.287881,0.494067,11.834841,0.478136,97804.236869,1.03451,4.412477,0.478136,0.46767,77.614208,0.46767
min,40.0,0.0,23.0,0.0,14.0,0.0,25100.0,0.5,113.0,0.0,0.0,4.0,0.0
25%,51.0,0.0,116.5,0.0,30.0,0.0,212500.0,0.9,134.0,0.0,0.0,73.0,0.0
50%,60.0,0.0,250.0,0.0,38.0,0.0,262000.0,1.1,137.0,1.0,0.0,115.0,0.0
75%,70.0,1.0,582.0,1.0,45.0,1.0,303500.0,1.4,140.0,1.0,1.0,203.0,1.0
max,95.0,1.0,7861.0,1.0,80.0,1.0,850000.0,9.4,148.0,1.0,1.0,285.0,1.0


In [15]:
dataset.take(5).to_pandas_dataframe()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


## AutoML Configuration

Here are the automl configurations:
* **max_concurrent_iterations:** Represents the maximum number of iterations that would be executed in parallel. The default value is 1.
* **experiment_timeout_minutes:** The minutes before experiment terminates
* **n_cross_validations:** To avoid overfitting, we need to use cross validation
* **primary_metric:** Accuracy
* **task:** Classification

In [16]:
# automl settings
automl_settings = {
    "experiment_timeout_minutes": 15,
    "iterations": 40,
    "max_concurrent_iterations": 4,
    "n_cross_validations": 3,
    "primary_metric": "accuracy"
}

# automl config
automl_config = AutoMLConfig(
    compute_target = compute_target,
    task = "classification",
    training_data = dataset,
    label_column_name = 'DEATH_EVENT',
    enable_early_stopping = True,
    debug_log = 'automl_errors.log',
    **automl_settings
)

In [17]:
# Submit your experiment
remote_run = experiment.submit(automl_config, show_output = True)

Submitting remote run.
No run_configuration provided, running on auto-ml with default configuration
Running on remote compute: auto-ml


Experiment,Id,Type,Status,Details Page,Docs Page
automl-heart-experiment,AutoML_e69b5bd7-1b0c-4db0-b840-db004666ba9d,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Something went wrong while printing the experiment progress but the run is still executing on the compute target. 
Please check portal for updated status: https://ml.azure.com/runs/AutoML_e69b5bd7-1b0c-4db0-b840-db004666ba9d?wsid=/subscriptions/167b9ae9-f575-4021-810b-253a2e5be2bd/resourcegroups/mlops/workspaces/main-workspace&tid=292035c0-a3ff-49d4-9b39-8cab5c9e0b53


## Run Details

In [18]:
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [19]:
remote_run.wait_for_completion(show_output=True)

Experiment,Id,Type,Status,Details Page,Docs Page
automl-heart-experiment,AutoML_e69b5bd7-1b0c-4db0-b840-db004666ba9d,automl,Failed,Link to Azure Machine Learning studio,Link to Documentation



Something went wrong while printing the experiment progress but the run is still executing on the compute target. 
Please check portal for updated status: https://ml.azure.com/runs/AutoML_e69b5bd7-1b0c-4db0-b840-db004666ba9d?wsid=/subscriptions/167b9ae9-f575-4021-810b-253a2e5be2bd/resourcegroups/mlops/workspaces/main-workspace&tid=292035c0-a3ff-49d4-9b39-8cab5c9e0b53


{'runId': 'AutoML_e69b5bd7-1b0c-4db0-b840-db004666ba9d',
 'target': 'auto-ml',
 'status': 'Failed',
 'startTimeUtc': '2022-08-01T11:52:45.713684Z',
 'endTimeUtc': '2022-08-01T13:12:36.048365Z',
 'services': {},
 'error': {'error': {'code': 'UserError',
   'message': 'Run timed out. No model completed training in the specified time. Possible solutions: \n1) Please check if there are enough compute resources to run the experiment. \n2) Increase experiment timeout when creating a run. \n3) Subsample your dataset to decrease featurization/training time. ',
   'messageFormat': 'Run timed out. No model completed training in the specified time. Possible solutions: \n1) Please check if there are enough compute resources to run the experiment. \n2) Increase experiment timeout when creating a run. \n3) Subsample your dataset to decrease featurization/training time. ',
   'messageParameters': {},
   'target': 'ExperimentTimeout',
   'details': [],
   'innerError': {'code': 'ResourceExhausted',
  

## Best Model

In [None]:
best_run, fitted_model = remote_run.get_output()

In [None]:
best_metrics = best_run.get_metrics()
print(f"Best Run Id: {best_run.id}", 
      f"Accuracy: {best_metrics['accuracy']}",
      f"Best Metrics: {best_metrics}",
      f"Best Model: {fitted_model}")


In [None]:
print(fitted_model._final_estimator)

In [20]:
# Save the best model
model = best_run.register_model(
    model_name = 'heart-failure-best-model-automl',
    model_path = './outputs/model.pkl'
)
print(model)

NameError: name 'best_run' is not defined

In [None]:
for model in Model.list(ws):
    print(model.name, 'Version:', model.version)

## Model Deployment

Deploy the model using azure container instance

In [None]:
import os
os.makedirs('./aml-model', exist_ok=True)
best_run.download_file(
    '/outputs/model.pkl',
    os.path.join('./aml-model','heart-failure-best-model-automl.pkl'))

for f in best_run.get_file_names():
    if f.startswith('outputs'):
        output_file_path = os.path.join('./aml-model', f.split('/')[-1])
        print(f"Downloading the {f} to {output_file_path} ")
        best_run.download_file(
            name = f,
            output_file_path = output_file_path
        )

In [None]:
# Define the environment
best_run.download_file('outputs/conda_env_v_1_0_0_.yml', 'conda_env.yml')
environment = Environment.from_conda_specification(
    name = 'heart-failure-env',
    file_path = 'conda_env.yml'
)

In [None]:
# Download the scoring file
model = best_run.register_model('oyputs/scoring_file_v_1_0_0.py', 'score.py')

In [None]:
# Inference configuration
inference_config = InferenceConfig(
    entry_script = 'score.py',
    environment = environment
)

In [None]:
# ACI configuration
aci_config = AciWebservice.deploy_configuration(
    cpu_cores = 1,
    memory_gb = 1,
    auth_enabled = True,
    enable_app_insights = True
)

In [None]:
# Deploy
webservice - Model.deploy(
    workspace = ws,
    name = 'heart-failure-ws',
    models = [model],
    inference_config = inference_config,
    deployment_config = aci_config,
    overwrite=True
)

print('****'*20)

## Consume

In [None]:
from pprint import pprint

# Select samples from the df
test_data = df.sample(5)
test_label = test_data.pop('DEATH_EVENT')

# Convert to JSON
data_json = test_data.to_dict(orient='records')

data = json.dumps({
    'data', data_json
})

pprint(data)

In [None]:
# Predictions
output = webservice.run(data)
print(output)

In [None]:
webservice.get_logs()

In [None]:
webservice.delete()
# compute_target.delete()

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
