In [7]:
import azureml.core 
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
import logging

In [8]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'capstone-automl'
experiment=Experiment(ws, experiment_name)
# Choose a name for the run history container in the workspace.
# NOTE: update these to match your existing experiment name
project_folder = './capstone-project-folder'

In [9]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name =  "compute-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                          # vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)
# For a more detailed view of current AmlCompute status, use get_status().

Found existing cluster, use it.
Succeeded.........................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


In [10]:
# Try to load the dataset from the Workspace. Otherwise, create it from the file
# NOTE: update the key to match the dataset name
found = False
key = "automl-capstone"
description_text = "bankmarketing train data"

if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        # Create AML Dataset and register it into Workspace
        example_data =  "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
        dataset = Dataset.Tabular.from_delimited_files(example_data)        
        #Register Dataset in Workspace
        dataset = dataset.register(workspace=ws,
                                   name=key,
                                   description=description_text)


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,age,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
count,32950.0,32950.0,32950.0,32950.0,32950.0,32950.0,32950.0,32950.0,32950.0,32950.0
mean,40.040212,257.335205,2.56173,962.17478,0.17478,0.076228,93.574243,-40.51868,3.615654,5166.859608
std,10.432313,257.3317,2.763646,187.646785,0.496503,1.572242,0.578636,4.623004,1.735748,72.208448
min,17.0,0.0,1.0,0.0,0.0,-3.4,92.201,-50.8,0.634,4963.6
25%,32.0,102.0,1.0,999.0,0.0,-1.8,93.075,-42.7,1.344,5099.1
50%,38.0,179.0,2.0,999.0,0.0,1.1,93.749,-41.8,4.857,5191.0
75%,47.0,318.0,3.0,999.0,0.0,1.4,93.994,-36.4,4.961,5228.1
max,98.0,4918.0,56.0,999.0,7.0,1.4,94.767,-26.9,5.045,5228.1


In [11]:
from azureml.automl.core.forecasting_parameters import ForecastingParameters

automl_settings = {
    "experiment_timeout_minutes": 60,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'accuracy'
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="y",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )    

In [12]:
# Submit your experiment
remote_run = experiment.submit(automl_config, show_output=True)

Submitting remote run.
No run_configuration provided, running on compute-cluster with default configuration
Running on remote compute: compute-cluster


Experiment,Id,Type,Status,Details Page,Docs Page
capstone-automl,AutoML_f7741924-507a-4922-89f4-f28de06f64ee,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: ModelSelection. Beginning model selection.

********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Train-Test data split
STATUS:       DONE
DESCRIPTION:  In order to accurately evaluate the model(s) trained by AutoML, we leverage a dataset that the model is not trained on. Hence, if the user doesn't provide an explicit validation dataset, a part of the training dataset is used to achieve this. For smaller datasets (fewer than 20,000 samples), cross-validation is leveraged, else a single hold-out set is split from the training data to serve as the validation dataset. Hence, your input data has been split into a training dataset and a holdout validation dataset.
      

In [13]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()
remote_run.wait_for_completion()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

{'runId': 'AutoML_f7741924-507a-4922-89f4-f28de06f64ee',
 'target': 'compute-cluster',
 'status': 'Completed',
 'startTimeUtc': '2022-11-30T17:29:36.257797Z',
 'endTimeUtc': '2022-11-30T18:22:44.856564Z',
 'services': {},
   'message': 'No scores improved over last 10 iterations, so experiment stopped early. This early stopping behavior can be disabled by setting enable_early_stopping = False in AutoMLConfig for notebook/python SDK runs.'}],
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'compute-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"e23ce264-cd45-41d2-a01f-f9751185e0ba\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-

In [14]:
# Retrieve and save your best automl model.
best_run, fitted_model = remote_run.get_output()
best_run_metrics = best_run.get_metrics()

In [15]:
import joblib
joblib.dump(fitted_model, 'model.pkl')

['model.pkl']

In [16]:
from azureml.core import Model
model = Model.register(
    workspace=ws, 
    model_name='automl_model', 
    model_path='./model.pkl'
)

Registering model automl_model


In [17]:
script_file_name = 'score_class.py'
best_run.download_file("outputs/scoring_file_v_1_0_0.py",script_file_name)

In [18]:

from azureml.core import Environment
# Get the environment
from azureml.automl.core.shared import constants

best_run.download_file(constants.CONDA_ENV_FILE_PATH, 'conda_dependencies.yml')
service_env = Environment.from_conda_specification(name='deployment-env', file_path='conda_dependencies.yml')

In [19]:
from azureml.core.model import InferenceConfig
inference_config = InferenceConfig(entry_script='score_class.py',
                                              source_directory= ".",
                                              environment=service_env)

In [20]:
from azureml.core.webservice import AciWebservice

deploy_config = AciWebservice.deploy_configuration(
    cpu_cores=1, memory_gb=2, auth_enabled=True, enable_app_insights=True
)

In [21]:
from azureml.core.model import Model
aci_service_name = "automl-classification"
service = Model.deploy(
    ws,
    aci_service_name,
    [model],
    inference_config,
    deploy_config,
    overwrite=True
   
)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
.
2022-11-30 18:28:18+00:00 Checking the status of inference endpoint automl-classification.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [24]:
import requests
import json
from azureml.core import Webservice
import pandas as pd
service = Webservice(workspace=ws, name= "automl-classification")
scoring_uri = service.scoring_uri
print(scoring_uri)
# If the service is authenticated, set the key or token
key, _ = service.get_keys()

# Set the appropriate headers
headers = {"Content-Type": "application/json"}
headers["Authorization"] = f"Bearer {key}"
# Make the request and display the response and logs
#data = pd.DataFrame({"DATE": pd.Series(["2/1/2018"])})
data = {
  "data": [
    {
      "age": 50,
      "job": "technician",
      "marital": "married",
      "education": "high.school",
      "default": "yes",
      "housing": "no",
      "loan": "yes",
      "contact": "cellular",
      "month": "apr",
      "day_of_week": "tue",
      "duration": 340,
      "campaign": 1,
      "pdays": 999,
      "previous": 0,
      "poutcome": "failure",
      "emp.var.rate": -1.8,
      "cons.price.idx": 	92.893,
      "cons.conf.idx":-46.2,
      "euribor3m": 	1.299,
      "nr.employed": 	5099.1
    }
  ],
  "method": "predict"
}


input_data = json.dumps(data)
# print(type(input_data))
resp = requests.post(scoring_uri, data=input_data, headers=headers)
print(resp.json())

http://d944ae02-1c4f-4e83-8aeb-61cc34eb36e8.eastus2.azurecontainer.io/score
{"result": ["no"]}


In [None]:
print(service.get_logs())