# Automated ML

### Import libraries

In [1]:
from azureml.core import Workspace, Experiment

### Create workspace and experiment instances

In [2]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'capstone-spam-classification-experiment'

experiment=Experiment(ws, experiment_name)

###  Dataset Overview
The dataset we are using is a spam classification dataset obtained from kaggle. We are going to perform multi-class text classification. 

### Get data

In [3]:
from azureml.core import Dataset
training_dataset = Dataset.get_by_name(ws, name='capstone-spam-dataset')

In [4]:
training_dataset.to_pandas_dataframe()

Unnamed: 0,Category,Message,Column3
0,ham,"Go until jurong point, crazy.. Available only ...",
1,ham,Ok lar... Joking wif u oni...,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,
3,ham,U dun say so early hor... U c already then say...,
4,ham,"Nah I don't think he goes to usf, he lives aro...",
...,...,...,...
5569,spam,This is the 2nd time we have tried 2 contact u...,
5570,ham,Will ü b going to esplanade fr home?,
5571,ham,"Pity, * was in mood for that. So...any other s...",
5572,ham,The guy did some bitching but I acted like i'd...,


### Connect to compute target

In [5]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# TODO: Create compute cluster
cluster_name = "capstone-compute-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # To use a different region for the compute, add a location='<region>' parameter
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [6]:
from azureml.train.automl import AutoMLConfig

automl_settings = {
    "n_cross_validations": 2,
    "primary_metric": 'accuracy',
    "enable_early_stopping": True,
    "max_concurrent_iterations": 5,
    "experiment_timeout_hours": 0.25,
    "featurization": 'auto',
}

automl_config = AutoMLConfig(
    task = 'classification',
    compute_target = compute_target,
    training_data = training_dataset,
    label_column_name = 'Category',
    **automl_settings
)

In [7]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
capstone-spam-classification-experiment,AutoML_285b02e6-769e-41c8-9e6b-143c00ed73ff,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


### Run Details

In [8]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [9]:
remote_run.wait_for_completion()

### Get the best model and display its properties

In [None]:
best_run, fitted_model = remote_run.get_output()

# Get best_run metrics
best_run_metrics = best_run.get_metrics()
for name, value in best_run_metrics.items():
    print(f"{name}: {value}")

In [None]:
best_run.get_file_names()

### Save the model

In [None]:
import joblib
joblib.dump(fitted_model, 'best-automl-model.pkl')

### Register the best model

In [None]:
from azureml.core import Model
model = Model.register(
    workspace=ws, 
    model_name='best-automl-model', 
    model_path='./best-automl-model.pkl'
)

# Model Deployment

### Create an inference config

In [81]:
from azureml.core import Environment
from azureml.core.model import InferenceConfig

# Get the environment
from azureml.automl.core.shared import constants

best_run.download_file(constants.CONDA_ENV_FILE_PATH, 'conda_dependencies.yml')
env = Environment.from_conda_specification(name='deployment-env', file_path='conda_dependencies.yml')

inference_config = InferenceConfig(
    environment=env,
    source_directory=".",
    entry_script="./automl_score.py",
)

### Define the deployment config - we deploy on Azure Container Instance (ACI)

In [82]:
from azureml.core.webservice import AciWebservice

deployment_config = AciWebservice.deploy_configuration(
    cpu_cores=1, memory_gb=2, auth_enabled=True, enable_app_insights=True
)

### Deploy the model as a web service

In [83]:
from azureml.core.model import Model
service = Model.deploy(
    ws,
    "automl-service",
    [model],
    inference_config,
    deployment_config,
    overwrite=True,
)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-09-25 17:54:09+00:00 Creating Container Registry if not exists.
2022-09-25 17:54:09+00:00 Registering the environment.
2022-09-25 17:54:09+00:00 Use the existing image.
2022-09-25 17:54:09+00:00 Generating deployment configuration.
2022-09-25 17:54:10+00:00 Submitting deployment to compute.
2022-09-25 17:54:11+00:00 Checking the status of deployment automl-service..
2022-09-25 17:56:03+00:00 Checking the status of inference endpoint automl-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"


### Send a request to the web service you deployed to test it

In [84]:
import requests
import json
from azureml.core import Webservice

service = Webservice(workspace=ws, name="automl-service")
scoring_uri = service.scoring_uri

# If the service is authenticated, set the key or token
key, _ = service.get_keys()

# Set the appropriate headers
headers = {"Content-Type": "application/json"}
headers["Authorization"] = f"Bearer {key}"

# Make the request and display the response and logs
data =  {
  "Inputs": {
    "data": [
      {
        "Message": "",
        "Column3": ""
     }
    ]
  }
}

input_data = json.dumps(data)
resp = requests.post(scoring_uri, data=input_data, headers=headers)
print(resp.json())

{"result": ["ham"]}


### Print the logs of the web service

In [85]:
print(service.get_logs())

/bin/bash: /azureml-envs/azureml_944df6c9e2b12a3bdcde13b5b8baccf0/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /azureml-envs/azureml_944df6c9e2b12a3bdcde13b5b8baccf0/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /azureml-envs/azureml_944df6c9e2b12a3bdcde13b5b8baccf0/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /azureml-envs/azureml_944df6c9e2b12a3bdcde13b5b8baccf0/lib/libtinfo.so.6: no version information available (required by /bin/bash)
2022-09-25T17:55:28,224582600+00:00 - rsyslog/run 
bash: /azureml-envs/azureml_944df6c9e2b12a3bdcde13b5b8baccf0/lib/libtinfo.so.6: no version information available (required by bash)
2022-09-25T17:55:28,240078100+00:00 - gunicorn/run 
2022-09-25T17:55:28,241451000+00:00 - iot-server/run 
2022-09-25T17:55:28,241645200+00:00 - nginx/run 
2022-09-25T17:55:28,248657700+00:00 | gunicorn/run | 
2022-09-25T17:55:28,258713100+00:00 | gu

### Delete the web service

In [None]:
service.delete()

### Shutdown the computes

In [None]:
try:
    instance = ComputeTarget(workspace=ws, name=compute_target)

    instance.delete()
    instance.wait_for_completion(show_output=True)
    print('Deleted compute resource')

except ComputeTargetException:
    print('Already deleted!')

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
