In [1]:
import os, json
import pandas as pd
import time
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from critic_functions import user_functions
from dotenv import load_dotenv

# load_dotenv("C:\\Users\\ghyadav\\work\\ghyadav_azure_sdk\\azure-sdk-for-python\\sdk\\evaluation\\azure-ai-evaluation\\critic_agent\\ghyadav.env")  # Load environment variables from .env file
load_dotenv("./eval_ws.env")  # Load environment variables from .env file

from azure.ai.projects import __version__ as projects_version
from packaging.version import Version
# some dependencies have been updated with breaking changes -- indicates whether to use updated models and APIs or not
updated_agents = Version(projects_version) > Version("1.0.0b10") or projects_version.startswith("1.0.0a")

if updated_agents:
    from azure.ai.agents.models import FunctionTool, ToolSet
    project_client = AIProjectClient(
        endpoint=os.environ["PROJECT_ENDPOINT"],
        credential=DefaultAzureCredential(),
        # api_version="latest"
    )
else:
    from azure.ai.projects.models import FunctionTool, ToolSet
    project_client = AIProjectClient.from_connection_string(
        credential=DefaultAzureCredential(),
        conn_str=os.environ["PROJECT_CONNECTION_STRING"],
    )

AGENT_NAME = "Critic Agent"

# Adding Tools to be used by Agent 
functions = FunctionTool(user_functions)

toolset = ToolSet()
toolset.add(functions)

[INFO] Could not import SKAgentConverter. Please install the dependency with `pip install semantic-kernel`.


In [2]:
os.environ["PROJECT_ENDPOINT"]

'https://ghyadav-critic-resource.services.ai.azure.com/api/projects/ghyadav-critic'

In [3]:
# Create a Critic Evaluation Agent
CRITIC_AGENT_NAME = "Critic Evaluation Assistant Test"

critic_agent = project_client.agents.create_agent(
    model=os.environ["MODEL_DEPLOYMENT_NAME"],
    name=CRITIC_AGENT_NAME,
    instructions="""You are a Critic Evaluation Assistant specialized in evaluating AI agents. 
    You have access to the following main evaluation functions:
    
    1. 'evaluate' - For targeted evaluation with specific parameters:
       - Can evaluate by agent_id (for comprehensive agent evaluation) 
       - Can evaluate by thread_id (for single conversation evaluation)
       - Allows specifying which evaluators to use
       
    2. 'auto_evaluate' - For automatic evaluation using all available evaluators:
       - Automatically runs all default evaluators (IntentResolution, ToolCallAccuracy, TaskAdherence)
       - Can evaluate by agent_id or thread_id
       - Best for quick, comprehensive assessments
   3. 'get_user_info' - For retrieving user information:
         - Can retrieve user information by user_id
         - Returns user details like name, email, and preferences

    When asked to evaluate an agent or conversation, determine which function is most appropriate 
    and gather the required parameters (Azure AI project endpoint, model configuration, etc.).
    Always provide clear, structured results and explain the evaluation findings.""",
    tools=functions.definitions,
)

print(f"Created Critic Evaluation Agent, ID: {critic_agent.id}")

Created Critic Evaluation Agent, ID: asst_03O5D6Q9XSdfvDa8OiYbmxOo


In [4]:
# Ask the critic agent to auto-evaluate our conversation thread
thread_id = "thread_YQpM4sgUQte7ZSwTQlBYiMH8"
evaluation_request = f"""Please auto-evaluate the conversation thread with ID '{thread_id}' using the following configuration:

Azure AI Project: {{
    "azure_endpoint": "{os.environ.get('PROJECT_ENDPOINT', 'YOUR_ENDPOINT')}"
}}

Model Configuration: {{
    "azure_endpoint": "{os.environ['AZURE_OPENAI_ENDPOINT']}",
    "api_key": "{os.environ['AZURE_OPENAI_API_KEY']}",
    "api_version": "{os.environ['AZURE_OPENAI_API_VERSION']}",
    "azure_deployment": "{os.environ['MODEL_DEPLOYMENT_NAME']}"
}}

This should automatically run all appropriate evaluators for this conversation."""
critic_thread = project_client.agents.threads.create()
# Create a new message for evaluation
if updated_agents:
    eval_message = project_client.agents.messages.create(
        thread_id=critic_thread.id,
        role="user",
        content=evaluation_request,
    )        
else:
    eval_message = project_client.agents.create_message(
        thread_id=critic_thread.id,
        role="user",
        content=evaluation_request,
    )
    
print(f"Created evaluation request message, ID: {eval_message.id}")
print("Evaluation request submitted to critic agent...")


Created evaluation request message, ID: msg_NfVJ2FYwY1PoZmTSN5oF76oQ
Evaluation request submitted to critic agent...


In [None]:
# Execute the critic agent
from azure.ai.agents.models import (
    RequiredFunctionToolCall,
    SubmitToolOutputsAction,
    ToolOutput,
)
# Cancel any previous runs if they exist
# try:
#     existing_runs = project_client.agents.runs.list(thread_id=critic_thread.id)
#     for run in existing_runs:
#         if run.status not in ["completed", "failed"]:
#             print(f"Cancelling existing run: {run.id} with status {run.status}")
#             project_client.agents.runs.cancel(thread_id=critic_thread.id, run_id=run.id)
# except Exception as e:
#     print(f"Error checking existing runs: {e}")
critic_run = project_client.agents.runs.create(thread_id=critic_thread.id, agent_id=critic_agent.id)

while critic_run.status in ["queued", "in_progress", "requires_action"]:
    time.sleep(1)
    critic_run = project_client.agents.runs.get(thread_id=critic_thread.id, run_id=critic_run.id)

    if critic_run.status == "requires_action" and isinstance(critic_run.required_action, SubmitToolOutputsAction):
        tool_calls = critic_run.required_action.submit_tool_outputs.tool_calls
        if not tool_calls:
            print("No tool calls provided - cancelling run")
            project_client.agents.runs.cancel(thread_id=critic_thread.id, run_id=critic_run.id)
            break

        tool_outputs = []
        for tool_call in tool_calls:
            if isinstance(tool_call, RequiredFunctionToolCall):
                try:
                    print(f"Executing critic tool call: {tool_call}")
                    output = functions.execute(tool_call)
                    tool_outputs.append(
                        ToolOutput(
                            tool_call_id=tool_call.id,
                            output=output,
                        )
                    )
                except Exception as e:
                    print(f"Error executing tool_call {tool_call.id}: {e}")

        print(f"Critic tool outputs: {tool_outputs}")
        if tool_outputs:
            project_client.agents.runs.submit_tool_outputs(thread_id=critic_thread.id, run_id=critic_run.id, tool_outputs=tool_outputs)



In [6]:
print(critic_thread.id)

thread_cEQbLICUpA71yR9iTzPAmadH


In [None]:
print("=== Critic Agent Conversation ===")
for message in project_client.agents.messages.list(critic_thread.id, order="asc"):
    print(f"Role: {message.role}")
    print(f"Content: {message.content[0].text.value}")
    print("-" * 40)