In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential, ClientSecretCredential
from azure.ai.ml.entities import AmlCompute
import time

try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    credential = InteractiveBrowserCredential()

try:
    workspace_ml_client = MLClient.from_config(credential)
    subscription_id = workspace_ml_client.subscription_id
    resource_group = workspace_ml_client.resource_group_name
    workspace_name = workspace_ml_client.workspace_name
except Exception as ex:
    print(ex)
    # Enter details of your AML workspace
    subscription_id = "<SUBSCRIPTION_ID>"
    resource_group = "<RESOURCE_GROUP>"
    workspace_name = "<AML_WORKSPACE_NAME>"
workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)

# Generating a unique timestamp that can be used for names and versions that need to be unique
timestamp = str(int(time.time()))

In [None]:
model_name = "od_simulate_sd"
foundation_models = workspace_ml_client.models.list(name=model_name)
foundation_model = max(foundation_models, key=lambda x: x.version)
print(
    f"\n\nUsing model name: {foundation_model.name}, version: {foundation_model.version}, id: {foundation_model.id} for inferencing"
)

In [None]:
import time
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment

# Endpoint names need to be unique in a region, hence using timestamp to create unique endpoint name
timestamp = int(time.time())
online_endpoint_name = "sd-testing-acs-1693404707"
# Create an online endpoint
endpoint = ManagedOnlineEndpoint(
    name=online_endpoint_name,
    description="Online endpoint",
    auth_mode="key",
)
# workspace_ml_client.begin_create_or_update(endpoint).wait()

### Create ACS Endpoint

In [None]:
from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient

acs_client = CognitiveServicesManagementClient(credential, subscription_id)


# settings for the Azure AI Content Safety resource
# we will choose existing AACS resource if it exists, otherwise create a new one
# name of azure ai content safety resource, has to be unique

import time
endpoint_name = f"acs-test-ep"  # Replace with your endpoint name
aacs_name = f"{endpoint_name}-aacs-{str(time.time()).replace('.','')}"
available_aacs_locations = ["east us", "west europe"]

# create a new Cognitive Services Account
kind = "ContentSafety"
aacs_sku_name = "S0"
aacs_location = available_aacs_locations[0]


print("Available SKUs:")
aacs_skus = acs_client.resource_skus.list()
print("SKU Name\tSKU Tier\tLocations")
for sku in aacs_skus:
    if sku.kind == "ContentSafety":
        locations = ",".join(sku.locations)
        print(sku.name + "\t" + sku.tier + "\t" + locations)

print(
    f"Choose a new Azure AI Content Safety resource in {aacs_location} with SKU {aacs_sku_name}"
)

In [None]:
from azure.mgmt.cognitiveservices.models import Account, Sku, AccountProperties

parameters = Account(
    sku=Sku(name=aacs_sku_name),
    kind=kind,
    location=aacs_location,
    properties=AccountProperties(
        custom_sub_domain_name=aacs_name, public_network_access="Enabled"
    ),
)
# How many seconds to wait between checking the status of an async operation.
wait_time = 10


def find_acs(accounts):
    return next(
        x
        for x in accounts
        if x.kind == "ContentSafety"
        and x.location == aacs_location
        and x.sku.name == aacs_sku_name
    )


try:
    # check if AACS exists
    aacs = acs_client.accounts.get(resource_group, aacs_name)
    print(f"Found existing Azure AI content safety Account {aacs.name}.")
except:
    try:
        # check if there is an existing AACS resource within same resource group
        aacs = find_acs(acs_client.accounts.list_by_resource_group(resource_group))
        print(
            f"Found existing Azure AI content safety Account {aacs.name} in resource group {resource_group}."
        )
    except:
        print(f"Creating Azure AI content safety Account {aacs_name}.")
        acs_client.accounts.begin_create(resource_group, aacs_name, parameters).wait()
        print("Resource created.")
        aacs = acs_client.accounts.get(resource_group, aacs_name)


aacs_endpoint = aacs.properties.endpoint
aacs_resource_id = aacs.id
aacs_name = aacs.name
print(
    f"AACS name is {aacs.name}, use this name in UAI preparation notebook to create UAI."
)
print(f"AACS endpoint is {aacs_endpoint}")
print(f"AACS ResourceId is {aacs_resource_id}")

aacs_access_key = acs_client.accounts.list_keys(
    resource_group_name=resource_group, account_name=aacs.name
).key1

In [None]:
uai_id = ""
uai_client_id = ""
uai_name = ""
if uai_name != "":
    from azure.mgmt.msi import ManagedServiceIdentityClient
    from azure.mgmt.msi.models import Identity

    msi_client = ManagedServiceIdentityClient(
        subscription_id=subscription_id,
        credential=credential,
    )
    uai_resource = msi_client.user_assigned_identities.get(resource_group, uai_name)
    uai_id = uai_resource.id
    uai_client_id = uai_resource.client_id

In [None]:
REQUEST_TIMEOUT_MS = 90000

deployment_env_vars = {
    "CONTENT_SAFETY_ACCOUNT_NAME": aacs_name,
    "CONTENT_SAFETY_ENDPOINT": aacs_endpoint,
    "CONTENT_SAFETY_KEY": aacs_access_key if uai_client_id == "" else None,
    "CONTENT_SAFETY_THRESHOLD": "2",
    "SUBSCRIPTION_ID": subscription_id,
    "RESOURCE_GROUP_NAME": resource_group,
    "UAI_CLIENT_ID": uai_client_id,
    "ENABLE_AZURE_CONTENT_SAFETY": True
}
deployment_env_vars

In [None]:
from azure.ai.ml.entities import OnlineRequestSettings, ProbeSettings

deployment_name = "sd-deploy-env-var-7"

print(foundation_model.id)
print(online_endpoint_name)
print(deployment_name)

# Create a deployment
demo_deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=online_endpoint_name,
    model=foundation_model.id,
    instance_type="Standard_DS3_v2", # Use GPU instance type like Standard_NC6s_v3 for faster inference
    instance_count=1,
    environment_variables=deployment_env_vars,
    request_settings=OnlineRequestSettings(
        max_concurrent_requests_per_instance=1,
        request_timeout_ms=90000,
        max_queue_wait_ms=500
    ),
    liveness_probe=ProbeSettings(
        failure_threshold=49,
        success_threshold=1,
        timeout=299,
        period=180,
        initial_delay=180,
    ),
    readiness_probe=ProbeSettings(
        failure_threshold=10,
        success_threshold=1,
        timeout=10,
        period=10,
        initial_delay=10,
    ),
)
workspace_ml_client.online_deployments.begin_create_or_update(demo_deployment).wait()
endpoint.traffic = {deployment_name: 100}
workspace_ml_client.begin_create_or_update(endpoint).result()