In [None]:
%pip install -U azure-ai-ml

In [None]:
from azure.ai.ml import MLClient
from azure.identity import (
    DefaultAzureCredential,
    InteractiveBrowserCredential,
)
from azure.ai.ml.entities import MarketplaceSubscription, ServerlessEndpoint

try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    credential = InteractiveBrowserCredential()

try:
    client = MLClient.from_config(credential=credential)
except:
    print("Please create a workspace configuration file in the current directory.")

# Get AzureML workspace object.
workspace = client._workspaces.get(client.workspace_name)
workspace_id = workspace._workspace_id

Let's figure out the name of the finetuned model

In [None]:
import os
from dotenv import load_dotenv

# Variables passed by previous notebooks
load_dotenv('.env.state')

FINETUNED_MODEL_NAME = os.getenv("FINETUNED_MODEL_NAME")

In [None]:
from utils import wait_for_model
print(f"Waiting for fine tuned model {FINETUNED_MODEL_NAME} to complete training...")
model = wait_for_model(client, FINETUNED_MODEL_NAME)
print(f"Deploying fine tuned model {FINETUNED_MODEL_NAME}")
model

In [None]:
# Do this if needed, does not work currently
# but llama3 example works from here - https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-serverless?tabs=python

#model_id="azureml://registries/azureml-meta/models/Llama-2-7b"
base_model_id = model.properties['baseModelId']
model_id = model.id
subscription_name=base_model_id.split('/')[-1]
print(f"Subscribing to {subscription_name} for model ID {base_model_id}")

The Asset ID required to deploy the model is not currently exposed through the Python SDK so we're constructing it using the information we have on hand.

In [None]:
model_asset_id = f"azureml://locations/westus3/workspaces/{workspace_id}/{"/".join(model.id.split('/')[9:])}"
print(f"Deploying model {model_asset_id}")

In [None]:
marketplace_subscription = MarketplaceSubscription(
    model_id=base_model_id,
    name=subscription_name,
)

marketplace_subscription = client.marketplace_subscriptions.begin_create_or_update(
    marketplace_subscription
).result()


In [None]:
# Deploy the model as a serverless endpoint
import uuid
guid = uuid.uuid4()
short_guid = str(guid)[:4]
endpoint_name=f"{model.name}-{short_guid}" # Name must be unique
print(f"Deploying model {model.name} as endpoint {endpoint_name}")

In [None]:
print("Waiting for deployment to complete...")
serverless_endpoint = ServerlessEndpoint(
    name=endpoint_name,
    model_id=model_asset_id
)

created_endpoint = client.serverless_endpoints.begin_create_or_update(
    serverless_endpoint
).result()

Let's extract the endpoint URL, name and keys and store them in the shared state to pass on to the next notebook

In [None]:
endpoint = client.serverless_endpoints.get(endpoint_name)
endpoint_keys = client.serverless_endpoints.get_keys(endpoint_name)

# Update the shared `.env.state` env file with the newly deployed finetuned model endpoint
from utils import update_state
update_state("FINETUNED_OPENAI_BASE_URL", endpoint.scoring_uri)
update_state("FINETUNED_OPENAI_API_KEY", endpoint_keys.primary_key)
update_state("FINETUNED_OPENAI_DEPLOYMENT", endpoint.name)

Test that the finetuned model is deployed and available

In [None]:
import requests

url = f"{endpoint.scoring_uri}/v1/completions"

payload = {
    "max_tokens": 1024,
    "prompt": [ "What do you know?" ]
}
headers = {
    "Content-Type": "application/json",
    "Authorization": endpoint_keys.primary_key
}

response = requests.post(url, json=payload, headers=headers)

response.json()