# Sample Notebook to deploy a Whisper MLFlow Huggingface model to an AML workspace using Python SDK

### Define required parameters

Update these parameters to test deployments in your own workspace

In [None]:
# Define Parameters
subscription_id="" # Replace with your subscription ID
resource_group="" # Replace with your resource group name
workspace_name="" # Replace with your workspace name
registry_name="" # Replace with your registry name
endpoint_name="" # Replace with your endpoint name
deployment_name="" # Replace with your deployment name
model_name="openai-whisper-large" # Name of the whisper-model in the registry
sku_name="Standard_DS4_v2" # Name of the sku(instance type) Check the model-list(can be found in the parent folder(inference)) to get the most optimal sku for your model (Default: Standard_DS4_v2)

### Install required libraries

In [None]:
# Skip this cell if all libraries are already installed
# The required libraries can be installed in the local environment using the following command:
%pip install azure-ai-ml==1.2.0 azure-identity

### Import required libraries

In [None]:
import os
import base64
import json
from azure.ai.ml import MLClient
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.entities import CodeConfiguration, Environment, Model, ManagedOnlineDeployment, ManagedOnlineEndpoint, OnlineRequestSettings
from azure.identity import DefaultAzureCredential

### Setup the ML Clients

In [None]:
# Create an ML Client to interact with your workspace
ws_client = MLClient(DefaultAzureCredential(), subscription_id=subscription_id, resource_group_name=resource_group, workspace_name=workspace_name)

# Create an ML Client to interact with the registry
reg_client = MLClient(DefaultAzureCredential(), subscription_id=subscription_id, resource_group_name=resource_group, registry_name=registry_name)

### Validate the model in the registry

In [None]:
# This cell validates the existence of the model in the registry
try:
    model_list = reg_client.models.list(name=model_name)
    version=list(model_list)[0].version
    model = reg_client.models.get(model_name, version)
    model.tags.update({"registry":registry_name})
    print(f"Model validated successfully. Using model {model_name}, version {version}")
except:
    print("Model not found in the registry. Check the registry for the list of supported models")

### Download the model

In [None]:
# Download model locally
reg_client.models.download(name=model_name, version=version)

### Register the model to the workspace

In [None]:
# Register the model to the workspace
mlflow_model=Model(
        path=f"./{model_name}/mlflow_model_folder", # replace with pointer to local download
        type=AssetTypes.MLFLOW_MODEL,
        name=model_name,
        tags=model.tags,
        description="MLflow model created from local path")

ws_model = ws_client.create_or_update(mlflow_model)
print(f"{ws_model}")

### Create Environment to support the whisper model

In [None]:
# Get the whisper environment from ACR
whisper_environment = Environment(name="whisper-env", image="docker.io/whisperlarge/mlflow-huggingface:pyfunc")

### Create an online endpoint

In [None]:
# Check if the endpoint already exists in the workspace
try:
    endpoint = ws_client.online_endpoints.get(endpoint_name)
    print("---Endpoint already exists---")
except:
    # Create an online endpoint if it doesn't exist

    # Define the endpoint
    endpoint = ManagedOnlineEndpoint(name=endpoint_name, description="Test endpoint for model")

    # Trigger the endpoint creation
    try:
        ws_client.begin_create_or_update(endpoint).wait()
        print("\n---Endpoint created successfully---\n")
    except Exception as err:
        raise RuntimeError(f"Endpoint creation failed. Detailed Response:\n{err}") from err

### Create an online endpoint-deployment

In [None]:
# Define the deployment
# Update the model version as necessary
deployment = ManagedOnlineDeployment(
    name="default",
    endpoint_name=endpoint_name,
    model=f"azureml:{ws_model.name}:{ws_model.version}",
    environment=whisper_environment,
    code_configuration= CodeConfiguration( code = "./whisper_deployment_resources/", scoring_script="whisper_score.py"),
    instance_type=sku_name,
    instance_count=1,
    request_settings=OnlineRequestSettings(request_timeout_ms=60000) # extended request_timeout to 60sec
)

# Trigger the deployment creation
try:
    ws_client.begin_create_or_update(deployment).wait()
    print("\n---Deployment created successfully---\n")
except Exception as err:
    raise RuntimeError(f"Deployment creation failed. Detailed Response:\n{err}") from err

### Inference Testing

#### Convert Audio File to Base64 encoded string

In [None]:
# languages supported are:
# "en","zh","de","es","ru","ko","fr","ja","pt","tr","pl","ca","nl","ar","sv","it",
# "id","hi","fi","vi","he","uk","el","ms","cs","ro","da","hu","ta","'no'","th","ur","hr",
# "bg","lt","la","mi","ml","cy","sk","te","fa","lv","bn","sr","az","sl","kn","et","mk",
# "br","eu","is","hy","ne","mn","bs","kk","sq","sw","gl","mr","pa","si","km","sn","yo",
# "so","af","oc","ka","be","tg","sd","gu","am","yi","lo","uz","fo","ht","ps","tk",
# "nn","mt","sa","lb","my","bo","tl","mg","as","tt","haw","ln","ha","ba","jw","su"

In [35]:
# Convert audio file into base64 encoded string

audio_file = "<LOCAL_AUDIO_FILE_PATH>" # Provide the audio file for inference
with open(audio_file,'rb') as f:
    audio = f.read()
base64encodedstr = base64.b64encode(audio).decode("ascii")

# Create sample-request.json file for the audio file

sample_file = os.path.join(os.getcwd(), "sample-request.json")
sample_request = {
    "audio": [base64encodedstr],
    "language": ["en"] # Language to be transcribed to.
}

with open(sample_file, 'w') as f:
    f.write(json.dumps(sample_request))

#### Remote Audio File Input

In [38]:
# For inferencing a remote audio file pass the audio file uri as "audio"
audio_file_uri = "<REMOTE_AUDIO_FILE_URI>" # Provide the audio file URI
sample_file = os.path.join(os.getcwd(), "sample-request.json")
sample_request = {
    "audio": [audio_file_uri],
    "language": ["en"] # Language to be transcribed to.
}

with open(sample_file, 'w') as f:
    f.write(json.dumps(sample_request))

In [None]:
# For testing the inference of the deployment, create a *sample-request.json* file in the same folder as the notebook. 
# Sample inputs for automatic-speech-recognition task can be found in the *sample-inputs* folder under *inference*.

# Invoke the deployment using the given input
try:
    with open("./sample-request.json", "r") as f:
        print(f"Input: \n{f.read()}")
    output = ws_client.online_endpoints.invoke(
        endpoint_name=endpoint_name,
        deployment_name=deployment_name,
        request_file="./sample-request.json",
    )
    print(f"Output: \n{output}\n")
except Exception as err:
    raise RuntimeError(f"Inference to endpoint: {endpoint_name} and deployment: {deployment_name} failed. Detailed Response:\n{err}") from err

### Delete all resources (Optional)

In [None]:
# Run this cell to delete all resources created as part of this notebook
try:
    ws_client.online_deployments.begin_delete(name=deployment_name, endpoint_name=endpoint_name)
    print("\n---Deployment deletion triggered successfully---\n")
except Exception as err:
    raise RuntimeError(f"Deployment {deployment_name} deletion failed. Detailed Response:\n{err}") from err