# Schedule an Online Evaluation with Tracing and AI Inference SDK
- You can configure the RecurrenceTrigger based on the class definition here. The code below demonstrates how to configure the RecurrenceTrigger to run the evaluation every 24 hours. You can also configure the trigger to run at a different interval, or at a specific time of day. Check the Trace menu in the Azure AI Foundry to see the results of the evaluation.
- reference: https://learn.microsoft.com/en-us/azure/ai-studio/how-to/online-evaluation

> ✨ ***Note*** <br>
> You application insight need to be created in Azure AI Foundry to trace your generative AI application. <br>
> Prior to setting up online evaluation, ensure you have first set up [tracing for your generative AI application using Azure AI Inference SDK](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/develop/trace-local-sdk?tabs=python).

In [1]:
import pandas as pd
import os
import json

from pprint import pprint
from azure.ai.evaluation import evaluate
from azure.ai.evaluation import RelevanceEvaluator
from azure.ai.evaluation import GroundednessEvaluator, GroundednessProEvaluator
from azure.identity import DefaultAzureCredential
from dotenv import load_dotenv
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import (
    Evaluation,
    Dataset,
    EvaluatorConfiguration,
    ConnectionType,
    EvaluationSchedule,
    RecurrenceTrigger,
    ApplicationInsightsConfiguration
)
import pathlib

from azure.ai.evaluation import evaluate
from azure.ai.evaluation import (
    ContentSafetyEvaluator,
    RelevanceEvaluator,
    CoherenceEvaluator,
    GroundednessEvaluator,
    FluencyEvaluator,
    SimilarityEvaluator,
    F1ScoreEvaluator,
    RetrievalEvaluator
)

from azure.ai.ml import MLClient



load_dotenv("../.env")

True

In [2]:
credential = DefaultAzureCredential()

azure_ai_project_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(),
    conn_str=os.environ.get("AZURE_AI_PROJECT_CONN_STR"),  # At the moment, it should be in the format "<Region>.api.azureml.ms;<AzureSubscriptionId>;<ResourceGroup>;<HubName>" Ex: eastus2.api.azureml.ms;xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxxxxx;rg-sample;sample-project-eastus2
)

model_config = {
    "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"),
    "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
    "azure_deployment": os.environ.get("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"),
    "api_version": os.environ.get("AZURE_OPENAI_API_VERSION"),
    "type": "azure_openai",
}

In [3]:
application_insights_connection_string = azure_ai_project_client.telemetry.get_connection_string()
if not application_insights_connection_string:
    print("Application Insights was not enabled for this project.")
    print("Enable it via the 'Tracing' tab in your Azure AI Foundry project page.")
    exit()

from azure.core.settings import settings 
from azure.monitor.opentelemetry import configure_azure_monitor

# https://learn.microsoft.com/en-us/azure/ai-studio/how-to/develop/trace-local-sdk?tabs=python
os.environ['AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED'] = 'true'
settings.tracing_implementation = "opentelemetry" 

configure_azure_monitor(connection_string=application_insights_connection_string)

print(f"Application Insights connection string: {application_insights_connection_string}")

Application Insights connection string: InstrumentationKey=be5a6225-5366-4ac5-948c-0e7679968305;IngestionEndpoint=https://swedencentral-0.in.applicationinsights.azure.com/;LiveEndpoint=https://swedencentral.livediagnostics.monitor.azure.com/;ApplicationId=d6e7dbde-b332-4538-a623-73a26b12676e


In [4]:
from azure.ai.inference.tracing import AIInferenceInstrumentor 

# Instrument AI Inference API to enable trace instrumentation for AI Inference
AIInferenceInstrumentor().instrument() 

In [5]:
from openai import AzureOpenAI
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage
from azure.ai.inference.models import UserMessage
from azure.core.credentials import AzureKeyCredential

aoai_inference_endpoint = os.getenv("AZURE_AI_INFERENCE_ENDPOINT")
aoai_api_key = os.getenv("AZURE_OPENAI_API_KEY")
aoai_deployment_name = os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT_NAME")

try:
    azure_ai_inference_client = ChatCompletionsClient(
        endpoint = aoai_inference_endpoint,
        credential = AzureKeyCredential(aoai_api_key),
    )

    print("=== Initialized azure_ai_inference_client ===")
    print(f"AZURE_AI_INFERENCE_ENDPOINT={aoai_inference_endpoint}")
    print(f"AZURE_OPENAI_DEPLOYMENT_NAME={aoai_deployment_name}")
        
except (ValueError, TypeError) as e:
    print(e)

=== Initialized azure_ai_inference_client ===
AZURE_AI_INFERENCE_ENDPOINT=https://aoai-services1.services.ai.azure.com/models
AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o-mini


In [None]:
import os
from azure.ai.inference.models import SystemMessage
from azure.ai.inference.models import UserMessage

NUM_SAMPLES = 20

LANGUAGE = "English"

topic = f"""
Contoso Electronics call center QnA related expected spoken utterances for {LANGUAGE} languages. create {NUM_SAMPLES} lines of jsonl of the topic related text.
"""

example = """
Here is examples of the expected format:
{"query":"How do you create a run?","context":"AML API only","response":"To create a run using the Azure Machine Learning API, you first need to create an Experiment. Once you have an experiment, you can create a Run object that is associated with that experiment. Here is some Python code that demonstrates this process:\\n\\n```\\nfrom azureml.core import Experiment, Run\\nfrom azureml.core.workspace import Workspace\\n\\n# Define workspace and experiment\\nws = Workspace.from_config()\\nexp = Experiment(workspace=ws, name='my_experiment')\\n\\n# Create a new run\\nrun = exp.start_logging()\\n```\\n\\nIn this code, the `from_config()` method reads the configuration file that you created when you set up your Azure Machine Learning workspace. The `Experiment` constructor creates an Experiment object that is associated with your workspace, and the `start_logging()` method creates a new Run object that is associated with the Experiment. Now you can use the `run` object to log metrics, upload files, and track other information related to your machine learning experiment.","ground_truth":"Paris is the capital of France."}
{"query":"How do you log a model?","context":"Logging can be done using any OSS Sdk","response":"There are a few ways to log models in Azure Machine Learning. \\n\\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\\n\\n```python\\nfrom azureml.core import Model\\n\\nmodel_path = './outputs/my_model.pkl'\\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\\n```\\n\\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \\n\\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\\n\\n```python\\nfrom sklearn.linear_model import LogisticRegression\\nfrom azureml.core.run import Run\\n\\n# Initialize a run object\\nrun = Run.get_context()\\n\\n# Train your model\\nX_train, y_train = ...\\nlog_reg = LogisticRegression().fit(X_train, y_train)\\n\\n# Save the model to the Run object's outputs directory\\nmodel_path = 'outputs/model.pkl'\\njoblib.dump(value=log_reg, filename=model_path)\\n\\n# Log the model as a run artifact\\nrun.upload_file(name=model_path, path_or_stream=model_path)\\n```\\n\\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.","ground_truth":"Paris is the capital of France."}
{"query":"What is the capital of France?","context":"France is in Europe","response":"Paris is the capital of France.","ground_truth":"Paris is the capital of France."}
"""

system_message = """
Generate plain text sentences of #topic# related text to improve the recognition of domain-specific words and phrases.
Domain-specific words can be uncommon or made-up words, but their pronunciation must be straightforward to be recognized. 
Use text data that's close to the expected spoken utterances. The nummber of utterances per line should be 1. 
jsonl format is required. use 'no' as number, 'query' as string, 'context' as string, 'response' as string, and 'ground_truth' as string.
only include the lines as the result. Do not include ```jsonl, ``` and blank line in the result. 

"""

user_message = f"""
#topic#: {topic}
Example: {example}
"""

# Simple API Call
response = azure_ai_inference_client.complete(
    model=aoai_deployment_name,
    messages=[
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message},
    ],
    temperature=0.8,
    top_p=0.1
)

content = response.choices[0].message.content
print(content)
print("Usage Information:")
#print(f"Cached Tokens: {response.usage.prompt_tokens_details.cached_tokens}") #only o1 models support this
print(f"Completion Tokens: {response.usage.completion_tokens}")
print(f"Prompt Tokens: {response.usage.prompt_tokens}")
print(f"Total Tokens: {response.usage.total_tokens}")

{"no": 1, "query": "What are the warranty options for Contoso Electronics products?", "response": "Contoso Electronics offers a one-year standard warranty on all products."}
{"no": 2, "query": "How can I track my order from Contoso Electronics?", "response": "You can track your order using the tracking link sent to your email."}
{"no": 3, "query": "What is the return policy for Contoso Electronics?", "response": "You can return products within 30 days for a full refund."}
{"no": 4, "query": "Do you offer technical support for your products?", "response": "Yes, we provide technical support via phone and online chat."}
{"no": 5, "query": "How do I reset my Contoso Electronics device?", "response": "To reset your device, press and hold the reset button for 10 seconds."}
{"no": 6, "query": "What payment methods are accepted at Contoso Electronics?", "response": "We accept credit cards, PayPal, and bank transfers."}
{"no": 7, "query": "Can I change my order after it has been placed?", "resp

### Set up online evaluation schedule

Evaluations are only supported in the same regions as AI-assisted risk and safety metrics.

### Prerequisites

- A new User-assigned Managed Identity in the same resource group and region. Make a note of the clientId; you'll need it later.
- An Azure AI Hub in the same resource group and region.
- An Azure AI project in this hub, see Create a project in Azure AI Foundry portal.
- An Azure Monitor Application Insights resource created in Azure AI Foundry portal.
- Navigate to the hub page in Azure portal and add Application Insights resource, see Update Azure Application Insights
- Azure OpenAI Deployment with GPT model supporting chat completion, for example gpt-4.
- Navigate to your Application Insights resource in the Azure portal and use the Access control (IAM) tab to add the Log Analytics Contributor role to the User-assigned Managed Identity you created previously.
? Attach the User-assigned Managed Identity to your project.?
- Navigate to your Azure AI Services in the Azure portal and use the Access control (IAM) tab to add the Cognitive Services OpenAI Contributor role to the User-assigned Managed Identity you created previously.

In [7]:
# id for each evaluator can be found in your AI Studio registry - please see documentation for more information
# init_params is the configuration for the model to use to perform the evaluation
# data_mapping is used to map the output columns of your query to the names required by the evaluator
# Evaluator parameter format - https://learn.microsoft.com/en-us/azure/ai-studio/how-to/develop/evaluate-sdk#evaluator-parameter-format
evaluators_cloud = {
    "f1_score": EvaluatorConfiguration(
        id=F1ScoreEvaluator.id,
    ),
    "relevance": EvaluatorConfiguration(
        id=RelevanceEvaluator.id,
        init_params={"model_config": model_config},
        data_mapping={"query": "${data.query}", "context": "${data.context}", "response": "${data.response}"},
    ),
    "groundedness": EvaluatorConfiguration(
        id=GroundednessEvaluator.id,
        init_params={"model_config": model_config},
        data_mapping={"query": "${data.query}", "context": "${data.context}", "response": "${data.response}"},
    ),
    # "retrieval": EvaluatorConfiguration(
    #     #from azure.ai.evaluation._evaluators._common.math import list_mean_nan_safe\nModuleNotFoundError: No module named 'azure.ai.evaluation._evaluators._common.math'
    #     #id=RetrievalEvaluator.id,
    #     id="azureml://registries/azureml/models/Retrieval-Evaluator/versions/2",
    #     init_params={"model_config": model_config},
    #     data_mapping={"query": "${data.query}", "context": "${data.context}", "response": "${data.response}"},
    # ),
    "coherence": EvaluatorConfiguration(
        id=CoherenceEvaluator.id,
        init_params={"model_config": model_config},
        data_mapping={"query": "${data.query}", "response": "${data.response}"},
    ),
    "fluency": EvaluatorConfiguration(
        id=FluencyEvaluator.id,
        init_params={"model_config": model_config},
        data_mapping={"query": "${data.query}", "context": "${data.context}", "response": "${data.response}"},
    ),
     "similarity": EvaluatorConfiguration(
        # currently bug in the SDK, please use the id below
        #id=SimilarityEvaluator.id,
        id="azureml://registries/azureml/models/Similarity-Evaluator/versions/3",
        init_params={"model_config": model_config},
        data_mapping={"query": "${data.query}", "response": "${data.response}"},
    ),
}


In [9]:
kusto_query = 'let gen_ai_spans=(dependencies | where isnotnull(customDimensions["gen_ai.system"]) | extend response_id = tostring(customDimensions["gen_ai.response.id"]) | project id, operation_Id, operation_ParentId, timestamp, response_id); let gen_ai_events=(traces | where message in ("gen_ai.choice", "gen_ai.user.message", "gen_ai.system.message") or tostring(customDimensions["event.name"]) in ("gen_ai.choice", "gen_ai.user.message", "gen_ai.system.message") | project id= operation_ParentId, operation_Id, operation_ParentId, user_input = iff(message == "gen_ai.user.message" or tostring(customDimensions["event.name"]) == "gen_ai.user.message", parse_json(iff(message == "gen_ai.user.message", tostring(customDimensions["gen_ai.event.content"]), message)).content, ""), system = iff(message == "gen_ai.system.message" or tostring(customDimensions["event.name"]) == "gen_ai.system.message", parse_json(iff(message == "gen_ai.system.message", tostring(customDimensions["gen_ai.event.content"]), message)).content, ""), llm_response = iff(message == "gen_ai.choice", parse_json(tostring(parse_json(tostring(customDimensions["gen_ai.event.content"])).message)).content, iff(tostring(customDimensions["event.name"]) == "gen_ai.choice", parse_json(parse_json(message).message).content, "")) | summarize operation_ParentId = any(operation_ParentId), Input = maxif(user_input, user_input != ""), System = maxif(system, system != ""), Output = maxif(llm_response, llm_response != "") by operation_Id, id); gen_ai_spans | join kind=inner (gen_ai_events) on id, operation_Id | project Input, System, Output, operation_Id, operation_ParentId, gen_ai_response_id = response_id'

# AzureMSIClientId is the clientID of the User-assigned managed identity created during set-up - see documentation for how to find it
properties = {"AzureMSIClientId": os.environ.get("AZURE_MSI_CLIENT_ID")}

service_name = "evaluation_sdk_schedule"

# Your Application Insights resource ID
# At the moment, it should be something in the format "/subscriptions/<AzureSubscriptionId>/resourceGroups/<ResourceGroup>/providers/Microsoft.Insights/components/<ApplicationInsights>""
#app_insights_resource_id = os.environ.get("APP_INSIGHTS_RESOURCE_ID")
app_insights_resource_id = "/subscriptions/3d4d3dd0-79d4-40cf-a94e-b4154812c6ca/resourceGroups/AOAI-group3/providers/microsoft.insights/components/my-app-insight"

# Connect to your Application Insights resource
app_insights_config = ApplicationInsightsConfiguration(
    resource_id=app_insights_resource_id, query=kusto_query
)

In [10]:
# Frequency to run the schedule
recurrence_trigger = RecurrenceTrigger(frequency="hour", interval=1)

# Configure the online evaluation schedule
evaluation_schedule = EvaluationSchedule(
    data=app_insights_config,
    evaluators=evaluators_cloud,
    trigger=recurrence_trigger,
    description=f"scheduled evaluation",
    properties=properties
)

# Create the online evaluation schedule
created_evaluation_schedule = azure_ai_project_client.evaluations.create_or_replace_schedule(service_name, evaluation_schedule)
print(
    f"Successfully submitted the online evaluation schedule creation request - {created_evaluation_schedule.name}, currently in {created_evaluation_schedule.provisioning_state} state."
)

Successfully submitted the online evaluation schedule creation request - evaluation_sdk_schedule, currently in Creating state.


In [11]:


evaluation_schedule = azure_ai_project_client.evaluations.get_schedule(service_name)
print(evaluation_schedule)

# Sample for list evaluation schedules
for evaluation_schedule in azure_ai_project_client.evaluations.list_schedule():
    print(evaluation_schedule)

# Sample for disable an evaluation schedule with name
# project_client.evaluations.disable_schedule(service_name)

{'name': 'evaluation_sdk_schedule', 'description': 'scheduled evaluation', 'isEnabled': True, 'provisioningState': 'Succeeded', 'data': {'type': 'AppInsights', 'resourceId': '/subscriptions/3d4d3dd0-79d4-40cf-a94e-b4154812c6ca/resourceGroups/AOAI-group3/providers/microsoft.insights/components/application_insight01', 'query': 'let gen_ai_spans=(dependencies | where isnotnull(customDimensions["gen_ai.system"]) | extend response_id = tostring(customDimensions["gen_ai.response.id"]) | project id, operation_Id, operation_ParentId, timestamp, response_id); let gen_ai_events=(traces | where message in ("gen_ai.choice", "gen_ai.user.message", "gen_ai.system.message") or tostring(customDimensions["event.name"]) in ("gen_ai.choice", "gen_ai.user.message", "gen_ai.system.message") | project id= operation_ParentId, operation_Id, operation_ParentId, user_input = iff(message == "gen_ai.user.message" or tostring(customDimensions["event.name"]) == "gen_ai.user.message", parse_json(iff(message == "gen_

In [12]:
count = 0
for evaluation_schedule in azure_ai_project_client.evaluations.list_schedule():
    count += 1
    print(f"{count}.{evaluation_schedule.name} "
    f"[IsEnabled: {evaluation_schedule.is_enabled}]")
    print(f"Total evaluation schedules: {count}")

1.evaluation_sdk_schedule [IsEnabled: True]
Total evaluation schedules: 1
2.daily-evaluation [IsEnabled: False]
Total evaluation schedules: 2


Disable (soft-delete) online evaluation schedule:

In [None]:
# target_service_name = service_name
# azure_ai_project_client.evaluations.disable_schedule(target_service_name)