# Machine Learning Lab - Automated ML in Azure ML Studio

## Get Workspace Info

In [1]:
# Import required libraries
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import automl
from azure.ai.ml import Input

import pandas as pd

In [2]:
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential)

Found the config file in: /config.json


In [3]:
workspace = ml_client.workspaces.get(name=ml_client.workspace_name)

## Preview Yourr Flat File Data

In [4]:
uri = "azureml://subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001/datastores/stodsba6190class/paths/epsilon/mtcars.csv"
df = pd.read_csv(uri)
df = df.drop('model', axis=1)
df.head()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


## Create an MLTable Object

In [5]:
import mltable

# create a table from the parquet paths
tbl = mltable.from_delimited_files(paths = [{'pattern': uri}], header='all_files_same_headers', delimiter=',')
tbl= tbl.drop_columns(columns='model')

# print the first 5 records of the table as a check
tbl.show(5)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,21.0,6,160.0,110,3.9,2.62,16.46,False,True,4,4
1,21.0,6,160.0,110,3.9,2.875,17.02,False,True,4,4
2,22.8,4,108.0,93,3.85,2.32,18.61,True,True,4,1
3,21.4,6,258.0,110,3.08,3.215,19.44,True,False,3,1
4,18.7,8,360.0,175,3.15,3.44,17.02,False,False,3,2


In [6]:
mltable_uri = "azureml://subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001/datastores/stodsba6190class/paths/epsilon/mtcars_mltable"
tbl.save(path=mltable_uri, colocated=True, show_progress=True, overwrite=True)

Copying 1 files with concurrency set to 1
Copied stodsba6190class/epsilon/mtcars.csv, file 1 out of 1. Destination path: https://stodsba6190class.blob.core.windows.net/datalake/epsilon/mtcars_mltable/stodsba6190class/epsilon/mtcars.csv
Files copied=1, skipped=0, failed=0
Copying 1 files with concurrency set to 1
Copied /tmp/tmpze4xjqix/MLTable, file 1 out of 1. Destination path: https://stodsba6190class.blob.core.windows.net/datalake/epsilon/mtcars_mltable/MLTable
Files copied=1, skipped=0, failed=0


paths:
- pattern: azureml://subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001/datastores/stodsba6190class/paths/epsilon/mtcars.csv
transformations:
- read_delimited:
    delimiter: ','
    empty_as_string: false
    encoding: utf8
    header: all_files_same_headers
    include_path_column: false
    infer_column_types: true
    partition_size: 20971520
    path_column: Path
    support_multi_line: false
- drop_columns: model
type: mltable

## Create Experiment

In [7]:
my_training_data_input = Input(type=AssetTypes.MLTABLE, path=mltable_uri)

In [8]:
# general job parameters
max_trials = 5
exp_name = "jake-mtcars-experiment"

In [9]:
regression_job = automl.regression(
    experiment_name=exp_name,
    training_data=my_training_data_input,
    target_column_name="mpg",
    primary_metric="R2Score",
    # n_cross_validations=5,
    enable_model_explainability=True
)

# Limits are all optional
regression_job.set_limits(
    timeout_minutes=600,
    trial_timeout_minutes=20,
    max_trials=max_trials,
    # max_concurrent_trials = 4,
    # max_cores_per_trial: -1,
    enable_early_termination=True,
)

## Submit Job

In [10]:
# Submit the AutoML job
returned_job = ml_client.jobs.create_or_update(
    regression_job
)  # submit the job to the backend

print(f"Created job: {returned_job}")

Created job: compute: azureml:/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001/computes/
creation_context:
  created_at: '2024-10-30T22:15:09.001799+00:00'
  created_by: Jake Brulato
  created_by_type: User
display_name: salmon_bone_r11wzzfty1
experiment_name: jake-mtcars-experiment
id: azureml:/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001/jobs/salmon_bone_r11wzzfty1
limits:
  enable_early_termination: true
  max_concurrent_trials: 1
  max_cores_per_trial: -1
  max_nodes: 1
  max_trials: 5
  timeout_minutes: 600
  trial_timeout_minutes: 20
log_verbosity: info
name: salmon_bone_r11wzzfty1
outputs: {}
primary_metric: r2_score
properties: {}
queue_settings:
  job_tier: 'null'
resources:
  instance_count: 1
  sh

In [11]:
# Wait for job to complete and stream updates
ml_client.jobs.stream(returned_job.name)

RunId: salmon_bone_r11wzzfty1
Web View: https://ml.azure.com/runs/salmon_bone_r11wzzfty1?wsid=/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001

Execution Summary
RunId: salmon_bone_r11wzzfty1
Web View: https://ml.azure.com/runs/salmon_bone_r11wzzfty1?wsid=/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001



In [13]:
# Get a URL for the status of the job
returned_job.services["Studio"].endpoint

'https://ml.azure.com/runs/salmon_bone_r11wzzfty1?wsid=/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001&tid=88d59d7d-aecb-41b2-90c5-55595de02536'

In [14]:
print(returned_job.name)

salmon_bone_r11wzzfty1


## Get Best Model

In [15]:
## Obtain the tracking URI for MLFlow
import mlflow

# Obtain the tracking URL from MLClient
MLFLOW_TRACKING_URI = ml_client.workspaces.get(
    name=ml_client.workspace_name
).mlflow_tracking_uri

print(MLFLOW_TRACKING_URI)

azureml://eastus.api.azureml.ms/mlflow/v1.0/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001


In [16]:
# Set the MLFLOW TRACKING URI

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

print("\nCurrent tracking uri: {}".format(mlflow.get_tracking_uri()))


Current tracking uri: azureml://eastus.api.azureml.ms/mlflow/v1.0/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001


In [17]:
from mlflow.tracking.client import MlflowClient
from mlflow.artifacts import download_artifacts

# Initialize MLFlow client
mlflow_client = MlflowClient()

  from google.protobuf import service as _service


In [18]:
# Get the AutoML parent Job
job_name = returned_job.name

# Example if providing an specific Job name/ID
# job_name = "b4e95546-0aa1-448e-9ad6-002e3207b4fc"

# Get the parent run
mlflow_parent_run = mlflow_client.get_run(job_name)

print("Parent Run: ")
print(mlflow_parent_run)

Parent Run: 
<Run: data=<RunData: metrics={'explained_variance': 0.9873667429871951,
 'mean_absolute_error': 0.2533623619074189,
 'mean_absolute_percentage_error': 1.1222824207482647,
 'median_absolute_error': 0.12242908859067284,
 'normalized_mean_absolute_error': 0.010781377102443358,
 'normalized_median_absolute_error': 0.005209748450666929,
 'normalized_root_mean_squared_error': 0.015474058328575128,
 'normalized_root_mean_squared_log_error': 0.012953570611092447,
 'r2_score': 0.9810285049021514,
 'root_mean_squared_error': 0.3636403707215155,
 'root_mean_squared_log_error': 0.014493406547849336,
 'spearman_correlation': 0.9976641969574278}, params={}, tags={'_azureml.ComputeTargetType': '',
 'automl_best_child_run_id': 'salmon_bone_r11wzzfty1_4',
 'fit_time_000': '0.0106233;0.3071656;0.30686749999999996;2;3',
 'iteration_000': '0;1;2;3;4',
 'mlflow.rootRunId': 'salmon_bone_r11wzzfty1',
 'mlflow.runName': 'salmon_bone_r11wzzfty1',
 'mlflow.user': 'Jake Brulato',
 'model_explain_bes

In [19]:
# Print parent run tags. 'automl_best_child_run_id' tag should be there.
print(mlflow_parent_run.data.tags)

{'model_explain_run': 'best_run', '_azureml.ComputeTargetType': '', 'pipeline_id_000': 'faf12f74cf9bbd358ca5525682c5030d36f7be7c;4bc4ec47eb8df2d5d68b361cd60120e65196f757;5cc37daec73ea64276ef956449645cdb519fdfc6;__AutoML_Ensemble__;__AutoML_Stack_Ensemble__', 'score_000': '0.6907880558394194;0.9809442455316771;-1;0.9809442455316771;0.9810285049021514', 'predicted_cost_000': '0;0;0.5;0;0', 'fit_time_000': '0.0106233;0.3071656;0.30686749999999996;2;3', 'training_percent_000': '100;100;100;100;100', 'iteration_000': '0;1;2;3;4', 'run_preprocessor_000': 'MaxAbsScaler;MaxAbsScaler;StandardScalerWrapper;;', 'run_algorithm_000': 'LightGBM;XGBoostRegressor;XGBoostRegressor;VotingEnsemble;StackEnsemble', 'automl_best_child_run_id': 'salmon_bone_r11wzzfty1_4', 'model_explain_best_run_child_id': 'salmon_bone_r11wzzfty1_4', 'mlflow.rootRunId': 'salmon_bone_r11wzzfty1', 'mlflow.runName': 'salmon_bone_r11wzzfty1', 'mlflow.user': 'Jake Brulato'}


In [20]:
# Get the AutoML best child run

best_child_run_id = mlflow_parent_run.data.tags["automl_best_child_run_id"]
print("Found best child run id: ", best_child_run_id)

best_run = mlflow_client.get_run(best_child_run_id)

print("Best child run: ")
print(best_run)

Found best child run id:  salmon_bone_r11wzzfty1_4
Best child run: 
<Run: data=<RunData: metrics={'explained_variance': 0.9873667429871951,
 'mean_absolute_error': 0.2533623619074189,
 'mean_absolute_percentage_error': 1.1222824207482647,
 'median_absolute_error': 0.12242908859067284,
 'normalized_mean_absolute_error': 0.010781377102443358,
 'normalized_median_absolute_error': 0.005209748450666929,
 'normalized_root_mean_squared_error': 0.015474058328575128,
 'normalized_root_mean_squared_log_error': 0.012953570611092447,
 'r2_score': 0.9810285049021514,
 'root_mean_squared_error': 0.3636403707215155,
 'root_mean_squared_log_error': 0.014493406547849336,
 'spearman_correlation': 0.9976641969574278}, params={}, tags={'mlflow.parentRunId': 'salmon_bone_r11wzzfty1',
 'mlflow.rootRunId': 'salmon_bone_r11wzzfty1',
 'mlflow.runName': 'upbeat_feast_83dqybvq',
 'mlflow.source.name': 'automl_driver.py',
 'mlflow.source.type': 'JOB',
 'mlflow.user': 'Jake Brulato',
 'model_explain_run_id': 'salm

In [21]:
# Get best model run's metrics
best_run.data.metrics

{'r2_score': 0.9810285049021514,
 'median_absolute_error': 0.12242908859067284,
 'explained_variance': 0.9873667429871951,
 'root_mean_squared_error': 0.3636403707215155,
 'root_mean_squared_log_error': 0.014493406547849336,
 'normalized_mean_absolute_error': 0.010781377102443358,
 'normalized_root_mean_squared_log_error': 0.012953570611092447,
 'normalized_median_absolute_error': 0.005209748450666929,
 'mean_absolute_error': 0.2533623619074189,
 'mean_absolute_percentage_error': 1.1222824207482647,
 'spearman_correlation': 0.9976641969574278,
 'normalized_root_mean_squared_error': 0.015474058328575128}

## Deploying the Best Model as an API Endpoint

### Create Managed Endpoint

In [22]:
# import required libraries
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
    ProbeSettings,
)
from azure.ai.ml.constants import ModelType

In [23]:
# Creating a unique endpoint name with current datetime to avoid conflicts
import datetime

online_endpoint_name = "jake-mtcars-endpoint-01"

# create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="online endpoint for the jake's mtcars AutoML model",
    auth_mode="key"
)

In [24]:
ml_client.begin_create_or_update(endpoint).result()

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://jake-mtcars-endpoint-01.eastus.inference.ml.azure.com/score', 'openapi_uri': 'https://jake-mtcars-endpoint-01.eastus.inference.ml.azure.com/swagger.json', 'name': 'jake-mtcars-endpoint-01', 'description': "online endpoint for the jake's mtcars AutoML model", 'tags': {}, 'properties': {'createdBy': 'Jake Brulato', 'createdAt': '2024-10-30T22:33:42.540566+0000', 'lastModifiedAt': '2024-10-30T22:33:42.540566+0000', 'azureml.onlineendpointid': '/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/providers/microsoft.machinelearningservices/workspaces/mls-dsba6190-class-dev-eastus-001/onlineendpoints/jake-mtcars-endpoint-01', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/oeidp:25d84a7a-c07

### Register Best Model

In [25]:
model_name = "jake-mtcars-model"
model = Model(
    path=f"azureml://jobs/{best_run.info.run_id}/outputs/artifacts/outputs/mlflow-model/",
    name=model_name,
    description="Instructor diabetes regression AutoML model",
    type=AssetTypes.MLFLOW_MODEL,
)

registered_model = ml_client.models.create_or_update(model)

In [26]:
registered_model.id

'/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001/models/jake-mtcars-model/versions/1'

### Deploy

In [27]:
deployment = ManagedOnlineDeployment(
    name="jake-mtcars-deploy",
    endpoint_name=online_endpoint_name,
    model=registered_model.id,
    instance_type="Standard_DS3_V2",
    instance_count=1,
    liveness_probe=ProbeSettings(
        failure_threshold=30,
        success_threshold=1,
        timeout=2,
        period=10,
        initial_delay=2000,
    ),
    readiness_probe=ProbeSettings(
        failure_threshold=10,
        success_threshold=1,
        timeout=10,
        period=10,
        initial_delay=2000,
    ),
)

In [28]:
ml_client.online_deployments.begin_create_or_update(deployment).result()

Check: endpoint jake-mtcars-endpoint-01 exists


......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

ManagedOnlineDeployment({'private_network_connection': None, 'package_model': False, 'provisioning_state': 'Succeeded', 'endpoint_name': 'jake-mtcars-endpoint-01', 'type': 'Managed', 'name': 'jake-mtcars-deploy', 'description': None, 'tags': {}, 'properties': {'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/odidp:25d84a7a-c070-46c4-b283-0de91b6aa4e9:d9a02828-36e9-43c8-acdd-4b3cc051d81e?api-version=2023-04-01-preview'}, 'print_as_yaml': False, 'id': '/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourceGroups/rg-dsba6190-class-dev-eastus-001/providers/Microsoft.MachineLearningServices/workspaces/mls-dsba6190-class-dev-eastus-001/onlineEndpoints/jake-mtcars-endpoint-01/deployments/jake-mtcars-deploy', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/jakebrulato/code/Users/jbrulato', 'creation_context':

In [29]:
endpoint.traffic = {"jake-mtcars-deploy": 100}
ml_client.begin_create_or_update(endpoint).result()

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://jake-mtcars-endpoint-01.eastus.inference.ml.azure.com/score', 'openapi_uri': 'https://jake-mtcars-endpoint-01.eastus.inference.ml.azure.com/swagger.json', 'name': 'jake-mtcars-endpoint-01', 'description': "online endpoint for the jake's mtcars AutoML model", 'tags': {}, 'properties': {'createdBy': 'Jake Brulato', 'createdAt': '2024-10-30T22:33:42.540566+0000', 'lastModifiedAt': '2024-10-30T22:33:42.540566+0000', 'azureml.onlineendpointid': '/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/providers/microsoft.machinelearningservices/workspaces/mls-dsba6190-class-dev-eastus-001/onlineendpoints/jake-mtcars-endpoint-01', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/providers/Microsoft.MachineLearningServices/locations/eastus/mfeOperationsStatus/oeidp:25d84a7a-c07

### Test the API

In [30]:
import pandas as pd

uri = "azureml://subscriptions/e9bc187a-e9a1-46be-822e-e955a2563601/resourcegroups/rg-dsba6190-class-dev-eastus-001/workspaces/mls-dsba6190-class-dev-eastus-001/datastores/stodsba6190class/paths/epsilon/mtcars.csv"
test_df = pd.read_csv(uri).head(2)

test_data_json = test_df.to_json(orient="records", indent=4)
data = (
    '{ \
          "input_data": {"data": '
    + test_data_json
    + "}}"
)

In [31]:
request_file_name = "jake-request-mtcars.json"

with open(request_file_name, "w") as request_file:
    request_file.write(data)

# test the blue deployment with some sample data
ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    deployment_name="jake-mtcars-deploy",
    request_file="jake-request-mtcars.json",
)

'[20.8613029709733, 20.86187258885223]'

### Get Endpoint Details

In [32]:
# Get the details for online endpoint
endpoint = ml_client.online_endpoints.get(name=online_endpoint_name)

# existing traffic details
print(endpoint.traffic)

# Get the scoring URI
print(endpoint.scoring_uri)

{'jake-mtcars-deploy': 100}
https://jake-mtcars-endpoint-01.eastus.inference.ml.azure.com/score


In [33]:
# Delete the deployment and endpoint
ml_client.online_endpoints.begin_delete(name=online_endpoint_name)

<azure.core.polling._poller.LROPoller at 0x7f233d2833d0>

.................................................................