# Create a single model deployment using a custom container

## 1. Configure parameters, assets, and clients

### 1.1 Set endpoint details

In [1]:
import random

endpoint_name = f"singlemodfastep-{random.randint(0,10000)}"

### 1.2 Set asset paths
Define the directories containing the two model files as well as a directory which contains the scoring script

In [2]:
import os

base_path = "../model-1"
models_path = os.path.join(base_path, "model")
code_path = os.path.join(base_path, "onlinescoring")
test_data_path = os.path.join(base_path, "test-data")

### 1.3 Examine the models folder
The models folder contains two models which will be loaded simultaneously by the scoring script.

In [3]:
import os

os.listdir(models_path)

['sklearn_regression_model.pkl']

### 1.4 Examine the score script

- Score script is located at `code_path/score.py`.
- This is just where init() is called in docker initialization. But, run() is a pass here.
- As there's other endpoint uri call to be maintained, the fastiapi app based code is hosted on uvicorn.
- The fastapi app `code_path/engine/api_engine.py` individual app.get() or app.post(), where all routing points are managed as a destination.

### 1.5 Examine the Dockerfile
The dockerfile is located at `../cli/custom-container/fast-in-dockerfile/minimal-single-model-conda-in-dockerfile.dockerfile`. It uses the AzureML Inference Minimal CPU image as a base and adds relevant dependencies for the scoring script.

### 1.6 Create an MLClient instance

In [4]:
from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    CodeConfiguration,
    Environment,
    ProbeSettings,
)
from azure.identity import DefaultAzureCredential
import subprocess

In [5]:
## Get handle of ml workspace
### Get the workspace from the config file
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(
    credential
)

Found the config file in: /config.json


In [6]:
## Get the workspace aligned values
_resource_group = ml_client.resource_group_name
_workspace_name = ml_client.workspace_name
_location = ml_client.workspaces.get(ml_client.workspace_name).location

## 2. Create an endpoint

### 2.1 Define and create the endpoint

In [7]:
endpoint = ManagedOnlineEndpoint(name=endpoint_name)
poller = ml_client.online_endpoints.begin_create_or_update(endpoint)
poller.wait()

### 2.2 Confirm that creation was successful

In [8]:
from azure.ai.ml.exceptions import DeploymentException

status = poller.status()
if status != "Succeeded":
    raise DeploymentException(status)
else:
    print("Endpoint creation succeeded")
    endpoint = poller.result()
    print("endpoint.provisioning_state: ", endpoint.provisioning_state)
    print("endpoint.scoring_uri: ", endpoint.scoring_uri)

Endpoint creation succeeded
endpoint.provisioning_state:  Succeeded
endpoint.scoring_uri:  https://singlemodfastep-1092.eastus2.inference.ml.azure.com/score


## 3. Create the deployment

In [9]:
endpoint_name

'singlemodfastep-1092'

In [10]:
## Build the BYOC image and store on ACR

cmd = (
    # keep the variable echo, but do not hide stderr
    "source util-scripts/build_byoc_image.sh 1>/dev/null "
    "&& printf '%s' \"$BYOC_IMAGE_NAME_PATH\""
)

byoc_generate_process = subprocess.run(
    [
        "bash", "-c", cmd, 
        endpoint_name, _resource_group, _workspace_name, _location
    ],
    text=True,
    capture_output=True,   # get both stdout and stderr
    check=False            # we will inspect the return code ourselves
)

In [11]:
byoc_image_name_path = byoc_generate_process.stdout.strip()

print("Image path :", byoc_image_name_path)
print("Exit code  :", byoc_generate_process.returncode)
if byoc_generate_process.stderr:
    print("--------- build_byoc_image.sh stderr ---------")
    print(byoc_generate_process.stderr)

Image path : mlws01contreg.azurecr.io/azureml-examples/minimal-single-model-fast2-in-dockerfile:1
Exit code  : 0
--------- build_byoc_image.sh stderr ---------

**************************************************************************************************************
* Extension "azure-cli-ml" cannot be used along with extension "ml". This may result in unexpected behaviour.*
* Please remove azure-cli-ml extension by running  "az extension remove -n azure-cli-ml                      *
**************************************************************************************************************
                

**************************************************************************************************************
* Extension "azure-cli-ml" cannot be used along with extension "ml". This may result in unexpected behaviour.*
* Please remove azure-cli-ml extension by running  "az extension remove -n azure-cli-ml                      *
********************************************

In [12]:
# !./util-scripts/build_byoc_image.sh

### 3.1 Define the deployment

In [13]:
deployment_name="custom-container-singlemodel"

In [14]:
from azure.ai.ml.entities import OnlineRequestSettings

deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=endpoint_name,
    model=Model(name="minimal-singlemodel", path=models_path),
    code_configuration=CodeConfiguration(
        code=code_path, scoring_script="score.py"
    ),
    environment=Environment(
        name="minimal-singlemodel",
        image=byoc_image_name_path,
        inference_config={
            "liveness_route": {"path": "/", "port": 8003},
            "readiness_route": {"path": "/", "port": 8003},
            "scoring_route": {"path": "/score", "port": 8003},
        },
    ),
    instance_type="Standard_DS3_v2",
    instance_count=1,
    liveness_probe=ProbeSettings(
        initial_delay=120
    ),
    request_settings=OnlineRequestSettings(
        request_timeout_ms=120000
    ),
)

### 3.2 Create the deployment

In [15]:
poller = ml_client.online_deployments.begin_create_or_update(deployment)
poller.wait()

Check: endpoint singlemodfastep-1092 exists


..................................................................................

### 3.3 Confirm that creation was successful

In [16]:
status = poller.status()
if status != "Succeeded":
    raise DeploymentException(status)
else:
    print("Deployment creation succeeded")
    deployment = poller.result()
    print("deployment.name: ", deployment.name)
    print("deployment.provisioning_state: ", deployment.provisioning_state)

Deployment creation succeeded
deployment.name:  custom-container-singlemodel
deployment.provisioning_state:  Succeeded


### 3.4 Set traffic to 100% 

In [17]:
endpoint.traffic = {deployment_name: 100}
poller = ml_client.begin_create_or_update(endpoint)
poller.wait()

Readonly attribute principal_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>
Readonly attribute tenant_id will be ignored in class <class 'azure.ai.ml._restclient.v2022_05_01.models._models_py3.ManagedServiceIdentity'>


## 4. Test the endpoint
The `model` JSON field in both JSON payloads indicates which model to score.

### 4.1 Test the model

In [18]:
import json

res = ml_client.online_endpoints.invoke(
    endpoint_name, request_file=os.path.join(test_data_path, "request.json")
)
print(json.loads(res))

{'prediction': 'your_result'}


### 4.2 Test the model with all its APIs

In [19]:

# Run the shell script and capture its output
result = subprocess.run([
    "bash", 
    "util-scripts/invoke_endpoint.sh",
    endpoint_name, _resource_group, _workspace_name, _location, test_data_path
], capture_output=True, text=True)

# Print stdout (normal output)
print("Script Output:\n", result.stdout)

# Optionally, print stderr (errors, if any)
if result.stderr:
    print("\nScript Errors:\n", result.stderr)


Script Output:
 Getting access key and scoring URL...
Base URL is https://singlemodfastep-1092.eastus2.inference.ml.azure.com
Scoring URL is https://singlemodfastep-1092.eastus2.inference.ml.azure.com/score
 
https://singlemodfastep-1092.eastus2.inference.ml.azure.com/
"healthy" 
https://singlemodfastep-1092.eastus2.inference.ml.azure.com/score1
"{\"message\": \"This is a custom GET endpoint\"}" 
https://singlemodfastep-1092.eastus2.inference.ml.azure.com/score
{"prediction":"your_result"} 
https://singlemodfastep-1092.eastus2.inference.ml.azure.com/predict
{"result":"your_prediction_result"} 


Script Errors:
 
**************************************************************************************************************
* Extension "azure-cli-ml" cannot be used along with extension "ml". This may result in unexpected behaviour.*
* Please remove azure-cli-ml extension by running  "az extension remove -n azure-cli-ml                      *
***********************************************

## 5. Delete assets

### 5.1 Delete the endpoint

In [20]:
# poller = ml_client.online_endpoints.begin_delete(name=endpoint_name)