# Automated ML

### Import libraries

In [1]:
from azureml.core import Workspace, Experiment

### Create workspace and experiment instances

In [2]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'capstone-spam-classification-experiment'

experiment=Experiment(ws, experiment_name)

###  Dataset Overview
The dataset we are using is a spam classification dataset obtained from kaggle. We are going to perform multi-class text classification. 

### Get data

In [3]:
from azureml.core import Dataset
training_dataset = Dataset.get_by_name(ws, name='capstone-spam-dataset')

In [81]:
training_dataset.to_pandas_dataframe()

Unnamed: 0,Category,Message,Column3
0,ham,"Go until jurong point, crazy.. Available only ...",
1,ham,Ok lar... Joking wif u oni...,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,
3,ham,U dun say so early hor... U c already then say...,
4,ham,"Nah I don't think he goes to usf, he lives aro...",
...,...,...,...
5569,spam,This is the 2nd time we have tried 2 contact u...,
5570,ham,Will ü b going to esplanade fr home?,
5571,ham,"Pity, * was in mood for that. So...any other s...",
5572,ham,The guy did some bitching but I acted like i'd...,


### Connect to compute target

In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
cluster_name = "capstone-compute-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # To use a different region for the compute, add a location='<region>' parameter
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [5]:
from azureml.train.automl import AutoMLConfig

automl_settings = {
    "n_cross_validations": 2,
    "primary_metric": 'accuracy',
    "enable_early_stopping": True,
    "max_concurrent_iterations": 5,
    "experiment_timeout_hours": 0.25,
    "featurization": 'auto',
}

automl_config = AutoMLConfig(
    task = 'classification',
    compute_target = compute_target,
    training_data = training_dataset,
    label_column_name = 'Category',
    **automl_settings
)

In [6]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
capstone-spam-classification-experiment,AutoML_d7b359dc-ccf1-4169-99c2-f8682c16f9cb,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


### Run Details

In [7]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [8]:
remote_run.wait_for_completion()

{'runId': 'AutoML_d7b359dc-ccf1-4169-99c2-f8682c16f9cb',
 'target': 'capstone-compute-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-09-25T05:30:59.316793Z',
 'endTimeUtc': '2022-09-25T05:50:13.121438Z',
 'services': {},
   'message': 'No scores improved over last 10 iterations, so experiment stopped early. This early stopping behavior can be disabled by setting enable_early_stopping = False in AutoMLConfig for notebook/python SDK runs.'}],
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '2',
  'target': 'capstone-compute-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"8d6ef96b-0e4e-4d5f-b5eb-bda3397b0c95\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versi

### Get the best model and display its properties

In [9]:
best_run, fitted_model = remote_run.get_output()

# Get best_run metrics
best_run_metrics = best_run.get_metrics()
for name, value in best_run_metrics.items():
    print(f"{name}: {value}")

Package:azureml-automl-runtime, training version:1.45.0, current version:1.44.0
Package:azureml-core, training version:1.45.0, current version:1.44.0
Package:azureml-dataset-runtime, training version:1.45.0, current version:1.44.0
Package:azureml-defaults, training version:1.45.0, current version:1.44.0
Package:azureml-interpret, training version:1.45.0, current version:1.44.0
Package:azureml-mlflow, training version:1.45.0, current version:1.44.0
Package:azureml-pipeline-core, training version:1.45.0, current version:1.44.0
Package:azureml-responsibleai, training version:1.45.0, current version:1.44.0
Package:azureml-telemetry, training version:1.45.0, current version:1.44.0
Package:azureml-train-automl-client, training version:1.45.0, current version:1.44.0
Package:azureml-train-automl-runtime, training version:1.45.0, current version:1.44.0
Package:azureml-train-core, training version:1.45.0, current version:1.44.0
Package:azureml-train-restclients-hyperdrive, training version:1.45.

f1_score_micro: 0.9883387154646573
precision_score_weighted: 0.9880346261102722
log_loss: 0.06150898727805547
average_precision_score_macro: 0.6609334133159324
accuracy: 0.9883387154646573
f1_score_macro: 0.6498394428538288
weighted_accuracy: 0.997003885565013
AUC_micro: 0.9988605107684039
norm_macro_recall: 0.4611080792948893
f1_score_weighted: 0.9879859923328818
matthews_correlation: 0.9491200751191275
recall_score_macro: 0.6407387195299261
recall_score_micro: 0.9883387154646573
average_precision_score_micro: 0.9969792778978809
precision_score_micro: 0.9883387154646573
balanced_accuracy: 0.6407387195299261
AUC_weighted: 0.9927306350988643
average_precision_score_weighted: 0.9962879420638737
AUC_macro: 0.8287535097769344
recall_score_weighted: 0.9883387154646573
precision_score_macro: 0.6598322082264922
confusion_matrix: aml://artifactId/ExperimentRun/dcid.AutoML_d7b359dc-ccf1-4169-99c2-f8682c16f9cb_39/confusion_matrix
accuracy_table: aml://artifactId/ExperimentRun/dcid.AutoML_d7b359d

In [10]:
best_run.get_file_names()

['accuracy_table',
 'automl_driver.py',
 'confusion_matrix',
 'logs/azureml/azureml_automl.log',
 'outputs/conda_env_v_1_0_0.yml',
 'outputs/engineered_feature_names.json',
 'outputs/env_dependencies.json',
 'outputs/featurization_summary.json',
 'outputs/generated_code/conda_environment.yaml',
 'outputs/generated_code/script.py',
 'outputs/generated_code/script_run_notebook.ipynb',
 'outputs/internal_cross_validated_models.pkl',
 'outputs/model.pkl',
 'outputs/pipeline_graph.json',
 'outputs/run_id.txt',
 'outputs/scoring_file_pbi_v_1_0_0.py',
 'outputs/scoring_file_v_1_0_0.py',
 'outputs/scoring_file_v_2_0_0.py',
 'system_logs/cs_capability/cs-capability.log',
 'system_logs/hosttools_capability/hosttools-capability.log',
 'system_logs/lifecycler/execution-wrapper.log',
 'system_logs/lifecycler/lifecycler.log',
 'system_logs/metrics_capability/metrics-capability.log',
 'system_logs/snapshot_capability/snapshot-capability.log',
 'user_logs/std_log.txt']

### Save the model

In [11]:
import joblib
joblib.dump(fitted_model, 'best-automl-model.pkl')

['best-automl-model.pkl']

### Register the best model

In [26]:
from azureml.core import Model
model = Model.register(
    workspace=ws, 
    model_name='best-automl-model', 
    model_path='./best-automl-model.pkl'
)

Registering model best-automl-model


# Model Deployment

### Create an inference config

In [70]:
from azureml.core import Environment
from azureml.core.model import InferenceConfig

# Get the environment
from azureml.automl.core.shared import constants

best_run.download_file(constants.CONDA_ENV_FILE_PATH, 'conda_dependencies.yml')
env = Environment.from_conda_specification(name='deployment-env', file_path='conda_dependencies.yml')

inference_config = InferenceConfig(
    environment=env,
    source_directory=".",
    entry_script="./automl_score.py",
)

### Define the deployment config - we deploy on Azure Container Instance (ACI)

In [71]:
from azureml.core.webservice import AciWebservice

deployment_config = AciWebservice.deploy_configuration(
    cpu_cores=1, memory_gb=2, auth_enabled=True, enable_app_insights=True
)

### Deploy the model as a web service

In [72]:
from azureml.core.model import Model
service = Model.deploy(
    ws,
    "automl-service",
    [model],
    inference_config,
    deployment_config,
    overwrite=True,
)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-09-25 10:40:02+00:00 Creating Container Registry if not exists.
2022-09-25 10:40:02+00:00 Registering the environment.
2022-09-25 10:40:03+00:00 Use the existing image.
2022-09-25 10:40:03+00:00 Generating deployment configuration.
2022-09-25 10:40:04+00:00 Submitting deployment to compute.
2022-09-25 10:40:05+00:00 Checking the status of deployment automl-service..
2022-09-25 10:41:57+00:00 Checking the status of inference endpoint automl-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"


### Send a request to the web service you deployed to test it

In [89]:
import requests
import json
from azureml.core import Webservice

service = Webservice(workspace=ws, name="automl-service")
scoring_uri = service.scoring_uri

# If the service is authenticated, set the key or token
key, _ = service.get_keys()

# Set the appropriate headers
headers = {"Content-Type": "application/json"}
headers["Authorization"] = f"Bearer {key}"

# Make the request and display the response and logs
data =  {
  "Inputs": {
    "data": [
      {
        "Column3": "",
        "Message": "oh my god!",
      }
    ]
  },
  "GlobalParameters": {
    "method": "predict"
  }
}

input_data = json.dumps(data)
resp = requests.post(scoring_uri, data=input_data, headers=headers)
print(resp.text)

An unexpected internal error occurred. Encountered Exception: Traceback (most recent call last):
  File "/azureml-envs/azureml_944df6c9e2b12a3bdcde13b5b8baccf0/lib/python3.7/site-packages/flask/app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "/azureml-envs/azureml_944df6c9e2b12a3bdcde13b5b8baccf0/lib/python3.7/site-packages/flask/app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "/azureml-envs/azureml_944df6c9e2b12a3bdcde13b5b8baccf0/lib/python3.7/site-packages/azureml_inference_server_http/server/routes.py", line 244, in handle_score
    response = wrap_response(response)
  File "/azureml-envs/azureml_944df6c9e2b12a3bdcde13b5b8baccf0/lib/python3.7/site-packages/azureml_inference_server_http/server/routes.py", line 44, in wrap_response
    return AMLResponse(response_body, response_status_code, response_headers, json_str=True)
  File "/azureml-envs/azureml_944df6c9e2b12a3bdcde13b5b8baccf0/l

### Print the logs of the web service

In [9]:
print(service.get_logs())

None


### Delete the web service

In [None]:
service.delete()

### Shutdown the computes

In [None]:
try:
    instance = ComputeTarget(workspace=ws, name=compute_target)

    instance.delete()
    instance.wait_for_completion(show_output=True)
    print('Deleted compute resource')

except ComputeTargetException:
    print('Already deleted!')

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
