## Import required packages

In [None]:
from azure.ai.ml import MLClient
from azure.identity import (
    DefaultAzureCredential,
)
from azure.ai.ml.entities import (
    CodeConfiguration, 
    ManagedOnlineEndpoint, ManagedOnlineDeployment, OnlineRequestSettings
)

## Create handle for azureml workspace

In [2]:
credential = DefaultAzureCredential()

# Initialize MLClient
ml_client = MLClient.from_config(
    credential
)

Found the config file in: /config.json
Class DeploymentTemplateOperations: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


## Read azureml environment/ model asset details

In [3]:
# Read the environment asset (already built and registered)
ts_env = "cisco-ts-env@latest"

# Read the model asset (already registered)
ts_model = "cisco-ts@latest"

## Create online endpoint

In [11]:
# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name
online_endpoint_name = "cisco-ts-ep2-gpu"

In [16]:
# managed endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="Online endpoint for Cisco TS model",
    auth_mode="key",
)

In [17]:
# managed endpoint create async call
ml_client.begin_create_or_update(endpoint).wait()

## Create deployment for the online endpoint

In [5]:
# Pass the deployment name
deployment_name = "deploy02"

In [19]:
# managed endpoint deployment
demo_deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=online_endpoint_name,
    model=ts_model,
    environment=ts_env,
    code_configuration=CodeConfiguration(
        code="onlinescoring",
        scoring_script="score.py",
    ),
    instance_type="Standard_NC24ads_A100_v4",
    instance_count=1,
    request_settings=OnlineRequestSettings(
        request_timeout_ms=120000,
    ),
)

In [None]:
# wait for managed deployment create to complete
ml_client.online_deployments.begin_create_or_update(demo_deployment).result()

In [None]:
# set traffic to 100% for deployment
endpoint.traffic = {str(deployment.name): 100}
ml_client.begin_create_or_update(endpoint).result()

## Test the online endpoint

In [22]:
# test endpoint - single series
sample_data = "./invoke/sample-ts.json"
ml_client.online_endpoints.invoke(endpoint_name=online_endpoint_name, request_file=sample_data)

'"{\\"mean\\": [102.63409423828125, 102.31231689453125, 102.18730163574219, 102.04451751708984, 101.8825454711914], \\"quantiles\\": {\\"0.1\\": [101.95132446289062, 101.62068176269531, 101.44732666015625, 101.32473754882812, 101.17073059082031], \\"0.2\\": [102.1462173461914, 101.85722351074219, 101.71337127685547, 101.59812927246094, 101.4380111694336], \\"0.3\\": [102.28924560546875, 102.01976776123047, 101.91670227050781, 101.79621887207031, 101.6313705444336], \\"0.4\\": [102.42024993896484, 102.15623474121094, 102.07482147216797, 101.9502182006836, 101.80138397216797], \\"0.5\\": [102.56583404541016, 102.28491973876953, 102.21682739257812, 102.07856750488281, 101.93266296386719], \\"0.6\\": [102.65829467773438, 102.42024993896484, 102.32421112060547, 102.22034454345703, 102.0597152709961], \\"0.7\\": [102.79363250732422, 102.57013702392578, 102.4725570678711, 102.35195922851562, 102.17548370361328], \\"0.8\\": [102.97952270507812, 102.76671600341797, 102.67338562011719, 102.50457

In [23]:
# test endpoint - multiple series (batch)
sample_data = "./invoke/multiple-ts.json"
ml_client.online_endpoints.invoke(endpoint_name=online_endpoint_name, request_file=sample_data)

'"{\\"results\\": [{\\"mean\\": [102.63409423828125, 102.31231689453125, 102.18730163574219, 102.04450988769531, 101.8825454711914], \\"quantiles\\": {\\"0.1\\": [101.95132446289062, 101.62068176269531, 101.44732666015625, 101.32473754882812, 101.17073059082031], \\"0.2\\": [102.1462173461914, 101.85722351074219, 101.71337127685547, 101.59812927246094, 101.4380111694336], \\"0.3\\": [102.28924560546875, 102.01976776123047, 101.91670227050781, 101.79621887207031, 101.6313705444336], \\"0.4\\": [102.42024993896484, 102.15623474121094, 102.07482147216797, 101.9502182006836, 101.80138397216797], \\"0.5\\": [102.56583404541016, 102.28491973876953, 102.21682739257812, 102.07856750488281, 101.93266296386719], \\"0.6\\": [102.65829467773438, 102.42024993896484, 102.32421112060547, 102.22034454345703, 102.0597152709961], \\"0.7\\": [102.79363250732422, 102.57013702392578, 102.4725570678711, 102.35195922851562, 102.17548370361328], \\"0.8\\": [102.97952270507812, 102.76671600341797, 102.67338562