In [3]:
from azure.ai.ml import MLClient
from azure.identity import (
    DefaultAzureCredential,
)
from azure.ai.ml.entities import (
    AmlCompute, Model, Environment, CodeConfiguration, 
    ManagedOnlineEndpoint, ManagedOnlineDeployment, OnlineRequestSettings, ProbeSettings
)
from azure.ai.ml.constants import AssetTypes

import time, sys, json, os
from IPython.display import display, JSON

In [4]:
credential = DefaultAzureCredential()

workspace_ml_client = MLClient.from_config(
    credential
)

Found the config file in: /config.json


In [3]:
# register the foundation model from local as Model asset in ml workspace
model = Model(
    path="models",
    type=AssetTypes.CUSTOM_MODEL,
    name="phi3mini4kgguf",
    description="phi3 - gguf format model",
)

workspace_ml_client.create_or_update(model)

Your file exceeds 100 MB. If you experience low speeds, latency, or broken connections, we recommend using the AzCopyv10 tool for this file transfer.

Example: azcopy copy '/mnt/batch/tasks/shared/LS_root/mounts/clusters/cpuds11001/code/Users/pupanda/gpt-use-case/foundation-models/quantized-model-inference/inference-phi3q-gguf/models' 'https://mlws012181044126.blob.core.windows.net/azureml-blobstore-b81b1c5e-0151-42cf-9c96-ad079150a5ee/LocalUpload/1ac8a3402ed1d31b0df813da3d01c34c/models' 

See https://docs.microsoft.com/azure/storage/common/storage-use-azcopy-v10 for more information.


Model({'job_name': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'phi3mini4kgguf', 'description': 'phi3 - gguf format model', 'tags': {}, 'properties': {}, 'print_as_yaml': False, 'id': '/subscriptions/6977e295-0d7c-4557-8e0b-26e2f6532103/resourceGroups/rg-mlws/providers/Microsoft.MachineLearningServices/workspaces/mlws01/models/phi3mini4kgguf/versions/4', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/cpuds11001/code/Users/pupanda/gpt-use-case/foundation-models/quantized-model-inference/inference-phi3q-gguf', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f21ce6863e0>, 'serialize': <msrest.serialization.Serializer object at 0x7f21ce684610>, 'version': '4', 'latest_version': None, 'path': 'azureml://subscriptions/6977e295-0d7c-4557-8e0b-26e2f6532103/resourceGroups/rg-mlws/workspaces/mlws01/datastores/workspaceblobstore/paths/Loc

In [4]:
from azure.ai.ml.entities import Environment, BuildContext

# Create a new environment based on the curated one
custom_env = Environment(
    name="phi3-gguf-env",
    description="Custom environment with additional dependencies",
    build=BuildContext(path="../env")
)

# Register the custom environment
workspace_ml_client.environments.create_or_update(custom_env)

Environment({'arm_type': 'environment_version', 'latest_version': None, 'image': None, 'intellectual_property': None, 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'phi3-gguf-env', 'description': 'Custom environment with additional dependencies', 'tags': {}, 'properties': {'azureml.labels': 'latest'}, 'print_as_yaml': False, 'id': '/subscriptions/6977e295-0d7c-4557-8e0b-26e2f6532103/resourceGroups/rg-mlws/providers/Microsoft.MachineLearningServices/workspaces/mlws01/environments/phi3-gguf-env/versions/1', 'Resource__source_path': '', 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/cpuds11001/code/Users/pupanda/gpt-use-case/foundation-models/quantized-model-inference/inference-phi3q-gguf', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f21ce686950>, 'serialize': <msrest.serialization.Serializer object at 0x7f21ce69d600>, 'version': '1', 'conda_file': None, 'build': <azure.ai.ml.entities._assets.

In [5]:
# Create online endpoint - endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name
online_endpoint_name = "phi3-gguf-ep"

In [6]:
# managed endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="Online endpoint for phi3 gguf",
    auth_mode="key",
)

In [7]:
# managed endpoint create async call
workspace_ml_client.begin_create_or_update(endpoint).wait()

In [7]:
deployment_name = "deploy01"
model = "phi3mini4kgguf@latest"
env = "phi3-gguf-env@latest"

In [12]:
# managed endpoint deployment
demo_deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=online_endpoint_name,
    model=model,
    environment=env,
    code_configuration=CodeConfiguration(
        code="../onlinescoring",
        scoring_script="score.py",
    ),
    instance_type="Standard_F16s_v2",
    instance_count=1,
    request_settings=OnlineRequestSettings(
        request_timeout_ms=120000,
    ),
    liveness_probe=ProbeSettings(
        initial_delay=600
    ),
)

In [13]:
# managed endpoint deplyment create async call
workspace_ml_client.online_deployments.begin_create_or_update(deployment=demo_deployment).wait()

Check: endpoint phi3-gguf-ep exists


..................................................

In [14]:
# update traffic to the deployment for 100%
endpoint.traffic = {deployment_name: 100}
workspace_ml_client.begin_create_or_update(endpoint).result()

ManagedOnlineEndpoint({'public_network_access': 'Enabled', 'provisioning_state': 'Succeeded', 'scoring_uri': 'https://phi3-gguf-ep.eastus2.inference.ml.azure.com/score', 'openapi_uri': 'https://phi3-gguf-ep.eastus2.inference.ml.azure.com/swagger.json', 'name': 'phi3-gguf-ep', 'description': 'Online endpoint for phi3 gguf', 'tags': {}, 'properties': {'createdBy': 'Purna Chandra Panda', 'createdAt': '2024-12-09T06:02:42.346318+0000', 'lastModifiedAt': '2024-12-09T06:02:42.346318+0000', 'azureml.onlineendpointid': '/subscriptions/6977e295-0d7c-4557-8e0b-26e2f6532103/resourcegroups/rg-mlws/providers/microsoft.machinelearningservices/workspaces/mlws01/onlineendpoints/phi3-gguf-ep', 'AzureAsyncOperationUri': 'https://management.azure.com/subscriptions/6977e295-0d7c-4557-8e0b-26e2f6532103/providers/Microsoft.MachineLearningServices/locations/eastus2/mfeOperationsStatus/oeidp:b81b1c5e-0151-42cf-9c96-ad079150a5ee:013454f2-d1db-4bb6-9a32-d52705db1b4c?api-version=2022-02-01-preview'}, 'print_as_y

### Test endpoint with sample data

In [12]:
# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method
response = workspace_ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file="../payload/request1.json",
)

print(response)

{"output": " 1. Eiffel Tower: Visit the iconic symbol of Paris and enjoy panoramic views from its observation decks.\n2. Notre Dame Cathedral: Explore this masterpiece of French Gothic architecture, though be aware that it is currently undergoing restoration after a fire in 2019.\n3. Louvre Museum: Visit the world's largest and most visited museum to see famous works like Leonardo da Vinci's Mona Lisa and ancient artifacts from Egyptian pharaohs.\n4. Arc de Triomphe: Stand atop this monumental arch, which offers a grand perspective of Paris and is best seen by climbing its steps or taking the elevator.\n5. Montmartre district: Visit the charming neighborhood known for its bohemian past, artistic history, and the famous Sacr\u00e9-C\u0153ur Basilica located atop the hill.\n6. Champs \u00c9lys\u00e9es: Experience one of Europe's most magnificent avenues by taking a stroll or enjoying Parisian cafes along this popular tourist destination.\n7. Mus\u00e9e d'Orsay: Visit this museum housed i

In [15]:
# score the sample_score.json file using the online endpoint with the azureml endpoint invoke method
response = workspace_ml_client.online_endpoints.invoke(
    endpoint_name=online_endpoint_name,
    request_file="../payload/request3.json",
)

print(response)

{"output": " The capital of India is New Delhi. It serves as the seat of government and hosts the offices of all three branches of government, including the President's residence, the Parliament buildings, and various ministries and departments. While the city itself was officially declared the capital in 1912, replacing Calcutta (now Kolkata), New Delhi has been central to Indian administration since British colonial times when it was established as the imperial city for administrative purposes during the British Raj."}
