In [12]:
from azure.ai.ml import MLClient
from azure.identity import (
    DefaultAzureCredential,
)
from azure.ai.ml.entities import (
    AmlCompute, Model, Environment, BuildContext, CodeConfiguration, 
    ManagedOnlineEndpoint, ManagedOnlineDeployment, OnlineRequestSettings, ProbeSettings
)
from azure.ai.ml.constants import AssetTypes

import time, sys, json, os
from IPython.display import display, JSON

In [13]:
credential = DefaultAzureCredential()

workspace_ml_client = MLClient.from_config(
    credential
)

Found the config file in: /config.json


### Register the model asset in workspace

In [14]:
model_name = "phi3mini4kgguf"

# Make the models query fail-safe
models = workspace_ml_client.models.list()
model = next((m for m in models if m.name == model_name), None)

if model is not None:
    print(f"Model {model.name} already exists.")
else:
    # Register the foundation model from local as Model asset in ml workspace
    model = Model(
        path="models",
        type=AssetTypes.CUSTOM_MODEL,
        name=model_name,
        description="phi3 - gguf format model",
    )

    workspace_ml_client.create_or_update(model)

Model phi3mini4kgguf already exists.


### Register the environment asset in workspace

In [15]:
env_name = "phi3-gguf-env-gpu"

# Make the environments query fail-safe
envs = workspace_ml_client.environments.list()
env = next((e for e in envs if e.name == env_name), None)

# Flag to indicate if the environment is modified. 
# Set it manually:: If True, old env version is used. If False, new env version is created.
is_env_earlierone = True

if env is not None and is_env_earlierone:
    print(f"Environment {env_name} already exists.")
else:
    # Create a new environment based on the curated one
    custom_env = Environment(
        name=env_name,
        description="Custom environment with additional dependencies",
        build=BuildContext(path="../env/gpu")
    )

    # Register the custom environment
    workspace_ml_client.environments.create_or_update(custom_env)

Environment phi3-gguf-env-gpu already exists.


### Create managed online endpoint

In [16]:
# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name
online_endpoint_name = "phi3-gguf-ep-gpu"

In [17]:
# managed endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="Online endpoint for phi3 gguf",
    auth_mode="key",
)

In [18]:
# managed endpoint create async call
workspace_ml_client.begin_create_or_update(endpoint).wait()

### Create managed online deployment for the endpoint

In [19]:
deployment_name = "deploy01"
model = f"{model_name}@latest"
env = f"{env_name}@latest"

In [20]:
# managed endpoint deployment
demo_deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=online_endpoint_name,
    model=model,
    environment=env,
    code_configuration=CodeConfiguration(
        code="../onlinescoring",
        scoring_script="score.py",
    ),
    instance_type="Standard_NC24ads_A100_v4",
    instance_count=1,
    request_settings=OnlineRequestSettings(
        request_timeout_ms=120000,
    ),
    liveness_probe=ProbeSettings(
        initial_delay=600
    ),
)

In [21]:
# managed endpoint deplyment create async call
workspace_ml_client.online_deployments.begin_create_or_update(deployment=demo_deployment).wait()

Check: endpoint phi3-gguf-ep-gpu exists


.....................................................................................................

In [None]:
# update traffic to the deployment for 100%
endpoint.traffic = {deployment_name: 100}
workspace_ml_client.begin_create_or_update(endpoint).result()

### Test endpoint with sample data

In [23]:
# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method
response = workspace_ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file="../payload/request1.json",
)

print(response)

{"output": " Absolutely! Paris is a city filled with history, art, and charm. Here's an itinerary for 10 must-visit places in the City of Light:\n\n1. Eiffel Tower - Visit this iconic symbol of France to enjoy panoramic views from its three levels. Don't forget to climb up or take a lift!\n2. Louvre Museum - Explore one of the largest and most famous museums in the world, housing thousands of works including Da Vinci's Mona Lisa.\n3. Notre-Dame Cathedral - Visit this masterpiece of French Gothic architecture to admire its stunning exterior and explore its interior during or after visiting other attractions nearby (note: as of 2021, the cathedral is undergoing restoration due to fire damage).\n4. Arc de Triomphe - Walk up the steps at the very top for a unique perspective on the city's Champs-\u00c9lys\u00e9es and views across Paris in all directions.\n5. Montmartre & Sacr\u00e9-C\u0153ur Basilica - Visit this hilltop neighborhood, famous for its bohemian past and colorful buildings; cl

In [24]:
# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method
response = workspace_ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file="../payload/request3.json",
)

print(response)

{"output": " The capital of India is New Delhi. It was officially declared as the country's capital in 1946, before which it shared that status with other cities like Calcutta (now Kolkata) and Bombay (now Mumbai). However, for administrative purposes, New Delhi became the seat of government in 1912. As a city, it is not only the political capital but also an important cultural center hosting numerous historical monuments such as the Parliament House, Rashtrapati Bhavan (President's Residence), and India Gate among others."}
