In [2]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import (
    KubernetesOnlineEndpoint,
    KubernetesOnlineDeployment,
    Model, 
    Environment,
    CodeConfiguration,
) 
from azure.ai.ml.entities._deployment.resource_requirements_settings import (
    ResourceRequirementsSettings,
)
from azure.ai.ml.entities._deployment.container_resource_settings import (
    ResourceSettings,
)

from constants import SUBSCRIPTION_ID, RESOURCE_GROUP, WORKSPACE

In [4]:
credential = DefaultAzureCredential()
# Check if given credential can get token successfully.
credential.get_token("https://management.azure.com/.default")

ml_client = MLClient(
    credential, SUBSCRIPTION_ID, RESOURCE_GROUP, WORKSPACE
)

In [6]:
import datetime 

loacl_endpoint_name = "loacl" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = KubernetesOnlineEndpoint(
    name=loacl_endpoint_name, 
    description="this is a sample local endpoint",
)

In [7]:
import sys 

!{sys.executable} -m pip install docker



In [8]:
ml_client.online_endpoints.begin_create_or_update(endpoint, local=True)

Creating local endpoint (loacl04021040089783) Done (0m 0s)
Field 'mirror_traffic': This is an experimental field, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


ManagedOnlineEndpoint({'public_network_access': None, 'provisioning_state': None, 'scoring_uri': None, 'openapi_uri': None, 'name': 'loacl04021040089783', 'description': 'this is a sample local endpoint', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': WindowsPath('C:/Users/leopu/.azureml/inferencing/loacl04021040089783'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x000001786FECD610>, 'auth_mode': 'key', 'location': None, 'identity': None, 'traffic': {}, 'mirror_traffic': {}, 'kind': None})

In [55]:
model = Model(path=r".\model\model\sklearn_regression_model.pkl")
env = Environment(
    conda_file=r".\model\environment\conda.yml",
    image="mcr.microsoft.com/azureml/minimal-ubuntu18.04-py37-cpu-inference:latest",
)

blue_deployment = KubernetesOnlineDeployment(
    name="blue",
    endpoint_name=loacl_endpoint_name,
    model=model,
    environment=env,
    code_configuration=CodeConfiguration(
        code=r".\model\onlinescoring", scoring_script="score.py"
    ),
    instance_count=1,
    resources=ResourceRequirementsSettings(
        requests=ResourceSettings(
            cpu="0.5",
            memory="0.5",
        )
    ),
)

In [56]:
ml_client.online_deployments.begin_create_or_update(blue_deployment, local=True, vscode_debug=False)

Updating local deployment (loacl04021040089783 / blue) .
Building Docker image from Dockerfile
Step 1/6 : FROM mcr.microsoft.com/azureml/minimal-ubuntu18.04-py37-cpu-inference:latest
 ---> a0c1ac3e0de3
Step 2/6 : RUN mkdir -p /var/azureml-app/
 ---> Using cache
 ---> 6933457dfc1d
Step 3/6 : WORKDIR /var/azureml-app/
 ---> Using cache
 ---> ca2874ed4bad
Step 4/6 : COPY conda.yml /var/azureml-app/
 ---> Using cache
 ---> 93effca0054d
Step 5/6 : RUN conda env create -n inf-conda-env --file conda.yml
 ---> Using cache
 ---> 6be8b3970398
Step 6/6 : CMD ["conda", "run", "--no-capture-output", "-n", "inf-conda-env", "runsvdir", "/var/runit"]
 ---> Using cache
 ---> 41e036971a20
Successfully built 41e036971a20
Successfully tagged loacl04021040089783:blue

Starting up endpoint.....Done (0m 30s)


KubernetesOnlineDeployment({'provisioning_state': 'Succeeded', 'endpoint_name': 'loacl04021040089783', 'type': 'Kubernetes', 'name': 'blue', 'description': None, 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': WindowsPath('c:/Users/leopu/OneDrive/Programming/Python/azure/kubernetes-deployment'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x0000017870B35DC0>, 'model': Model({'job_name': None, 'is_anonymous': False, 'auto_increment_version': False, 'name': '7713d7a5680d37a33a7ac52530aec294', 'description': None, 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': WindowsPath('c:/Users/leopu/OneDrive/Programming/Python/azure/kubernetes-deployment'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x0000017870B536A0>, 'version': '1', 'latest_version': None, 'path': 'C:\\Users\\leopu\\OneDrive\\Programm

In [57]:
# check log of deployment (also useful if you want to debug your deployment)
debug_logs = ml_client.online_deployments.get_logs(
    name="blue", endpoint_name=loacl_endpoint_name, lines=1000, local=True
)
print(debug_logs)

2023-04-02T11:38:41,166859396+00:00 - rsyslog/run 
2023-04-02T11:38:41,167476389+00:00 - nginx/run 
2023-04-02T11:38:41,168097479+00:00 - gunicorn/run 
2023-04-02T11:38:41,168747975+00:00 | gunicorn/run | 
nginx: [warn] the "user" directive makes sense only if the master process runs with super-user privileges, ignored in /etc/nginx/nginx.conf:1
2023-04-02T11:38:41,169563187+00:00 | gunicorn/run | ###############################################
2023-04-02T11:38:41,170672628+00:00 | gunicorn/run | AzureML Container Runtime Information
2023-04-02T11:38:41,171335608+00:00 | gunicorn/run | ###############################################
2023-04-02T11:38:41,171984155+00:00 | gunicorn/run | 
2023-04-02T11:38:41,172709239+00:00 | gunicorn/run | 
2023-04-02T11:38:41,173892941+00:00 | gunicorn/run | AzureML image information: minimal-ubuntu18.04-py37-cpu-inference:20230227.v13
2023-04-02T11:38:41,174454576+00:00 | gunicorn/run | 
2023-04-02T11:38:41,175008112+00:00 | gunicorn/run | 
2023-04-02T

In [58]:
ml_client.online_deployments.get(name="blue", endpoint_name=loacl_endpoint_name, local=True)

KubernetesOnlineDeployment({'provisioning_state': 'Succeeded', 'endpoint_name': 'loacl04021040089783', 'type': 'Kubernetes', 'name': 'blue', 'description': None, 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': WindowsPath('c:/Users/leopu/OneDrive/Programming/Python/azure/kubernetes-deployment'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x0000017870B5D4C0>, 'model': Model({'job_name': None, 'is_anonymous': False, 'auto_increment_version': False, 'name': '7713d7a5680d37a33a7ac52530aec294', 'description': None, 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': None, 'Resource__source_path': None, 'base_path': WindowsPath('c:/Users/leopu/OneDrive/Programming/Python/azure/kubernetes-deployment'), 'creation_context': None, 'serialize': <msrest.serialization.Serializer object at 0x0000017870B35E80>, 'version': '1', 'latest_version': None, 'path': 'C:\\Users\\leopu\\OneDrive\\Programm

In [59]:
# invoke local endpoint 
ml_client.online_endpoints.invoke(
    endpoint_name=loacl_endpoint_name,
    request_file=r".\model\sample-request.json",
    local=True,
)

'[11055.977245525679, 4503.079536107787]'

In [3]:
# deploy the model to AKS
import datetime 

online_endpoint_name = "k8s-endpoint" + datetime.datetime.now().strftime("%m%d%H%M%f")

# create an online endpoint
endpoint = KubernetesOnlineEndpoint(
    name=online_endpoint_name,
    compute="moby",
    description="this is a sample k8s endpoint",
    auth_mode="key",
    tags={"key": "test_deplyoment"},
)

In [5]:
ml_client.begin_create_or_update(endpoint).result()

KubernetesOnlineEndpoint({'provisioning_state': 'Succeeded', 'scoring_uri': 'http://20.123.111.254/api/v1/endpoint/k8s-endpoint04071846339402/score', 'openapi_uri': 'http://20.123.111.254/api/v1/endpoint/k8s-endpoint04071846339402/swagger.json', 'name': 'k8s-endpoint04071846339402', 'description': 'this is a sample k8s endpoint', 'tags': {'key': 'test_deplyoment'}, 'properties': {'azureml.onlineendpointid': '/subscriptions/5a361d37-b562-4eee-981b-0936493063e9/resourcegroups/mlgroup/providers/microsoft.machinelearningservices/workspaces/mlworkspace/onlineendpoints/k8s-endpoint04071846339402', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/5a361d37-b562-4eee-981b-0936493063e9/providers/Microsoft.MachineLearningServices/locations/northeurope/mfeOperationsStatus/oe:6888f348-7a7b-4570-a119-b500be349475:4847a014-7ebb-459d-a454-15caf4697f6a?api-version=2022-02-01-preview'}, 'print_as_yaml': True, 'id': '/subscriptions/5a361d37-b562-4eee-981b-0936493063e9/resourceGroups/

In [6]:
# configure the deployment
model = Model(path=r".\model\model\sklearn_regression_model.pkl")
env = Environment(
    conda_file=r".\model\environment\conda.yml",
    image="mcr.microsoft.com/azureml/minimal-ubuntu18.04-py37-cpu-inference:latest",
)

blue_deployment = KubernetesOnlineDeployment(
    name="blue",
    endpoint_name=online_endpoint_name,
    model=model,
    environment=env,
    code_configuration=CodeConfiguration(
        code=r".\model\onlinescoring", scoring_script="score.py"
    ),
    instance_count=1,
    resources=ResourceRequirementsSettings(
        requests=ResourceSettings(
            cpu="100m",
            memory="0.5Gi",
        )
    ),
)

In [7]:
ml_client.begin_create_or_update(blue_deployment).result()

Check: endpoint k8s-endpoint04071846339402 exists
[32mUploading onlinescoring (0.0 MBs): 100%|##########| 2473/2473 [00:00<00:00, 11706.61it/s]
[39m

[32mUploading sklearn_regression_model.pkl[32m (< 1 MB): 100%|##########| 756/756 [00:00<00:00, 15.5kB/s]
[39m



..................................................................................................................

HttpResponseError: (DeploymentFailed) InferencingClient HttpRequest error, error detail: {"errors":{"":["Resource requests for CPU and memory must not be null. If GPU is specified, resource limits for CPU and memory must also not be null. For both requests and limits, if specified, cpu must be greater than 1m and memory must be greater than 1Mi."]},"type":"https://tools.ietf.org/html/rfc7231#section-6.5.1","title":"One or more validation errors occurred.","status":400,"traceId":"00-517c3b80ee5b3cb905d866a94e45f22d-7776c6142e68b4f0-00"}
Code: DeploymentFailed
Message: InferencingClient HttpRequest error, error detail: {"errors":{"":["Resource requests for CPU and memory must not be null. If GPU is specified, resource limits for CPU and memory must also not be null. For both requests and limits, if specified, cpu must be greater than 1m and memory must be greater than 1Mi."]},"type":"https://tools.ietf.org/html/rfc7231#section-6.5.1","title":"One or more validation errors occurred.","status":400,"traceId":"00-517c3b80ee5b3cb905d866a94e45f22d-7776c6142e68b4f0-00"}
Exception Details:	(DeploymentFailed) InferencingClient HttpRequest error, error detail: {"errors":{"":["Resource requests for CPU and memory must not be null. If GPU is specified, resource limits for CPU and memory must also not be null. For both requests and limits, if specified, cpu must be greater than 1m and memory must be greater than 1Mi."]},"type":"https://tools.ietf.org/html/rfc7231#section-6.5.1","title":"One or more validation errors occurred.","status":400,"traceId":"00-517c3b80ee5b3cb905d866a94e45f22d-7776c6142e68b4f0-00"}
	The build log is available in the workspace blob store "mlstorageleo" under the path "/azureml/ImageLogs/477892dc-7748-49ed-aa0b-9612a1729a81/build.log"
	Code: DeploymentFailed
	Message: InferencingClient HttpRequest error, error detail: {"errors":{"":["Resource requests for CPU and memory must not be null. If GPU is specified, resource limits for CPU and memory must also not be null. For both requests and limits, if specified, cpu must be greater than 1m and memory must be greater than 1Mi."]},"type":"https://tools.ietf.org/html/rfc7231#section-6.5.1","title":"One or more validation errors occurred.","status":400,"traceId":"00-517c3b80ee5b3cb905d866a94e45f22d-7776c6142e68b4f0-00"}
	The build log is available in the workspace blob store "mlstorageleo" under the path "/azureml/ImageLogs/477892dc-7748-49ed-aa0b-9612a1729a81/build.log"