In [1]:
from autogen_core import CancellationToken
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.messages import TextMessage
from autogen_ext.models.semantic_kernel import SKChatCompletionAdapter
from autogen_core.models import ModelFamily, UserMessage
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
from semantic_kernel.memory.null_memory import NullMemory

import re
import os
from dotenv import load_dotenv

load_dotenv('../../.env')

True

In [2]:
import sys
import importlib
sys.path.append('../..')
from server.custom_types import MCT_Node
import criteria

* 'orm_mode' has been renamed to 'from_attributes'


In [3]:
importlib.reload(criteria)

<module 'criteria' from '/Users/shichengwen/Documents/Projects/TaskDecomposition/TaskDecomposition/server/evaluator/criteria.py'>

In [11]:
complexity_definition = (
    "A text is considered complex if it requires advanced knowledge "
    "or expertise, contains multiple layered or specialized concepts, or "
    "requires multi-step reasoning to understand or accomplish the described goal. "
    "Otherwise, it's considered not complex."
)

complexity_test_node = MCT_Node(
    id="complexity_test_node",
    label="comlexity_test_node",
    MCT_id="1",
    print_label="Entity Extraction",
    description="Entity Extraction",
    explanation="This step is crucial as it allows you to identify the key components (entities) and how they are related to each other, which is foundational for constructing a knowledge graph.",
    parentIds=[],
    MCT_parent_id=None,
)

complexity = await criteria.run_complexity_evaluation_agent(
    goal = "Understanding Document Relationships",
    node = complexity_test_node,
    model = "",
    api_key = "",
    complexity_definition=complexity_definition,
    few_shot_examples=[
        {
            "node": MCT_Node(
                id = "example_node_1",
                label = "Example Node",
                MCT_id = "2",
                description = "Named Entity Recognition",
                explanation = "NER is a key part of understanding how documents relate because it helps identify and categorize things like people, places, organizations, and dates. By spotting these entities, NER makes it easier to see connections between documents. For example, if two documents mention \"Apple Inc.,\" they might be talking about the same topic. It also adds context, like figuring out if \"Paris\" refers to the city or a person, which helps in understanding the bigger picture. Plus, NER pulls out important details that let you compare or group documents, making it easier to analyze trends or build connections. In short, NER lays the groundwork for figuring out how documents are linked.",
                parentIds = [],
                print_label = "NER",
                MCT_parent_id = None,),
            "user_evaluation": 1,
            
        }
    ]
)

complexity

{'gpt-4o-mini-2024-07-18': {'value': 0,
  'reason': 'Entity extraction is a process that involves identifying and categorizing key information or entities from unstructured text. The complexity of this task depends on various factors, including the specific requirements of the project, the variety of entities to be extracted, and the context in which this task is being performed.\n\n1. **Definition of Entity Extraction**: The task is somewhat akin to Named Entity Recognition (NER), which typically involves recognizing names of people, organizations, locations, etc. This implies a clear area of focus.\n\n2. **Potential for Layered Concepts**: Depending on the application, entity extraction can involve multiple layers of complexity. For example, if it requires extracting not just names but also relationships, context, or performing sentiment analysis, it could cross into a more complex territory.\n\n3. **Multi-Step Reasoning**: If entity extraction involves developing custom models, inco

In [2]:
# openai_client = OpenAI(api_key=open("../api_key").read().strip())

In [2]:
def task_def_toString(task, goal):
    return task

In [None]:
# from portkey_ai import createHeaders, PORTKEY_GATEWAY_URL

In [18]:
def get_model_clients(model_enabled: list):
    openai_api_key = os.getenv("OPENAI_API_KEY")
    anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
    client_list = []
    if openai_api_key and 'openai' in model_enabled:
        model_client = OpenAIChatCompletionClient(
            model='gpt-4o-mini',
            api_key=openai_api_key,
        )
        client_list.append(model_client)
    if anthropic_api_key and 'anthropic' in model_enabled:
        sk_client = AnthropicChatCompletion(
        ai_model_id="claude-3-5-sonnet-20241022",
        api_key=anthropic_api_key,service_id="my-service-id",  # Optional; for targeting specific services within Semantic Kernel
        )
        settings = AnthropicChatPromptExecutionSettings(
            temperature=0.2,
        )

        model_client = SKChatCompletionAdapter(
            sk_client,
            kernel=Kernel(memory=NullMemory()),
            prompt_settings=settings,
            model_info={
                "function_calling": True,
                "json_output": True,
                "vision": True,
                "family": ModelFamily.CLAUDE_3_5_SONNET,
            },
        )
        client_list.append(model_client)
    return client_list


In [20]:
client = get_model_clients(['anthropic'])[0]
agent = AssistantAgent(
    name="test",
    model_client=client,
    system_message="You are a helpful agent. You answer question with three exclamation marks at the end."
)
response = await agent.on_messages(
    [TextMessage(content="Who are you and what is the capital of France?", source="user")],
    cancellation_token=CancellationToken(),
)
print(response.chat_message.content.strip())

I am an AI assistant created by Anthropic, and the capital of France is Paris!!!


In [50]:
async def run_complexity_evaluation_agent(
    goal: str,
    node: "MCT_Node",
    model: str,
    api_key: str,
    complexity_definition: str,
    few_shot_examples: list[dict],
):
    """
    Run the complexity evaluation agent to evaluate whether the node is complex.
    Args:
        goal: The final task goal. (not actually used in the evaluation)
        node: The node to evaluate.
        model: The model to use for evaluation.
        api_key: The API key for the model.
        complexity_definition: The definition of complexity.
        few_shot_examples: Few-shot examples for the evaluation. (optional)
    """

    model_client = OpenAIChatCompletionClient(
        model=model,
        api_key=api_key,
    )

    complexity_evaluation_agent = AssistantAgent(
        name="complexity_evaluation_agent",
        model_client=model_client,
        # temperature=0.5,
        system_message=f"""You are a text complexity evaluator. The user will provide some text that describes a task.
Your job is to evaluate whether the text is 'complex' or 'not complex' based on the following definition:
{complexity_definition}

You must output your reasoning in a <REASONING>...</REASONING> block, then provide your final decision 
in a <RESULT>...</RESULT> block. The <RESULT> block must contain EXACTLY "Yes" or "No" (nothing else).

Example format:
<REASONING>This is my reasoning about complexity.</REASONING>
<RESULT>Yes</RESULT>
""",
    )

    few_shot_messages = []
    if len(few_shot_examples) > 0:
        for example in few_shot_examples:
            example_reasoning = example['user_reasoning']
            example_evaluation = example['user_evaluation']
            few_shot_messages.append(
                TextMessage(
                    content=task_def_toString(
                        # MCT_Node.model_validate(example["node"]), goal
                        example["node"], goal
                    ),
                    source="user",
                )
            )
            few_shot_messages.append(
                TextMessage(
                    content=(
                        f"<REASONING>{example_reasoning}</REASONING>\n"
                        f"<RESULT>{'Yes' if example_evaluation else 'No'}</RESULT>"
                    ),
                    source="assistant",
                )
            )

    user_message = task_def_toString(node, goal)
    messages = few_shot_messages + [TextMessage(content=user_message, source="user")]

    response = await complexity_evaluation_agent.on_messages(
        messages,
        cancellation_token=CancellationToken(),
    )

    result_text = response.chat_message.content.strip()

    reasoning_match = re.search(r"<REASONING>(.*?)</REASONING>", result_text, re.DOTALL)
    reasoning = reasoning_match.group(1).strip() if reasoning_match else ""

    result_match = re.search(r"<RESULT>(.*?)</RESULT>", result_text, re.DOTALL)
    final_result = result_match.group(1).strip() if result_match else "No"

    complexity_value = 1 if final_result == "Yes" else 0 if final_result == "No" else -1

    # save_json(
    #     {
    #         "system_message": complexity_evaluation_agent._system_messages[0].content,
    #         "few_shot_messages": list(
    #             map(lambda m: m.source + ": " + m.content, few_shot_messages)
    #         ),
    #         "response": result_text,
    #     },
    #     "complexity_evaluation.json",
    # )

    print(reasoning)

    return complexity_value

In [43]:
async def run_coherence_evaluation_agent(
    goal: str,
    parent_node: "MCT_Node",
    child_node: "MCT_Node",
    model: str,
    api_key: str,
    coherence_definition: str,
    few_shot_examples: list[dict],
):
    """
    Run the coherence evaluation agent to evaluate whether the child node is coherent with the parent node.
    Args:
        goal: The final task goal. (not actually used in the evaluation)
        parent_node: The parent node.
        child_node: The child node. (the node to evaluate)
        model: The model to use for evaluation.
        api_key: The API key for the model.
        coherence_definition: The definition of coherence.
        few_shot_examples: Few-shot examples for the evaluation. (optional)
    """

    model_client = OpenAIChatCompletionClient(
        model=model,
        api_key=api_key,
    )

    coherence_evaluation_agent = AssistantAgent(
        name="coherence_evaluation_agent",
        model_client=model_client,
        # temperature=0.5,
        system_message=f"""You are a coherence evaluator. 
You will be given two parts of a sequence: a parent part and a child part.
Evaluate whether the child part logically or thematically follows from the parent part 
according to the following definition of coherence:
{coherence_definition}

You must output your reasoning in a <REASONING>...</REASONING> block, 
then provide your final decision in a <RESULT>...</RESULT> block. 
The <RESULT> block must contain EXACTLY "Yes" or "No" (nothing else).

Example format:
<REASONING>This is my reasoning about coherence.</REASONING>
<RESULT>Yes</RESULT>
""",
    )

    def user_message_generator(_parent_node, _child_node):
        return """
        - Parent Part: {parent_part}
        - Child Part: {child_part}
        """.format(
            parent_part=task_def_toString(_parent_node, goal),
            child_part=task_def_toString(_child_node, goal),
        )

    few_shot_messages = []
    if len(few_shot_examples) > 0:
        for example in few_shot_examples:
            # example_parent = MCT_Node.model_validate(example["parent_node"])
            # example_child = MCT_Node.model_validate(example["node"])
            example_parent = example["node"]
            example_child = example["node"]
            example_reasoning = example["user_reasoning"]
            example_eval = example["user_evaluation"]

            few_shot_messages.append(
                TextMessage(
                    content=user_message_generator(example_parent, example_child),
                    source="user",
                )
            )
            few_shot_messages.append(
                TextMessage(
                    content=(
                        f"<REASONING>{example_reasoning}</REASONING>\n"
                        f"<RESULT>{'Yes' if example_eval else 'No'}</RESULT>"
                    ),
                    source="assistant",
                )
            )

    user_message = user_message_generator(parent_node, child_node)
    messages = few_shot_messages + [TextMessage(content=user_message, source="user")]

    response = await coherence_evaluation_agent.on_messages(
        messages,
        cancellation_token=CancellationToken(),
    )

    result_text = response.chat_message.content.strip()

    reasoning_match = re.search(r"<REASONING>(.*?)</REASONING>", result_text, re.DOTALL)
    reasoning = reasoning_match.group(1).strip() if reasoning_match else ""

    result_match = re.search(r"<RESULT>(.*?)</RESULT>", result_text, re.DOTALL)
    final_result = result_match.group(1).strip() if result_match else "No"

    coherence_value = 1 if final_result == "Yes" else 0 if final_result == "No" else -1

    # save_json(
    #     {
    #         "system_message": coherence_evaluation_agent._system_messages[0].content,
    #         "few_shot_messages": list(
    #             map(lambda m: m.source + ": " + m.content, few_shot_messages)
    #         ),
    #         "response": result,
    #     },
    #     "coherence_evaluation.json",
    # )

    print(reasoning)

    return coherence_value

In [34]:
async def run_importance_evaluation_agent(
    goal: str,
    node: "MCT_Node", # MCT_Node
    model: str,
    api_key: str,
    importance_definition: str,
    few_shot_examples: list[dict],
):
    """
    Run the importance evaluation agent to evaluate whether the node is important.
    Args:
        goal: The final task goal. (necessary for the evaluation)
        node: The node to evaluate.
        model: The model to use for evaluation.
        api_key: The API key for the model.
        importance_definition: The definition of importance.
        few_shot_examples: Few-shot examples for the evaluation. (optional)
    """

    model_client = OpenAIChatCompletionClient(
        model=model,
        api_key=api_key,
    )

    importance_evaluation_agent = AssistantAgent(
        name="importance_evaluation_agent",
        model_client=model_client,
        # temperature=0.5,
        system_message=f"""You are an importance evaluator. 
You will be given a final task goal and a subtask description.
Evaluate whether the subtask is important using the following definition:
{importance_definition}

You must output your reasoning in a <REASONING>...</REASONING> block, then provide your final decision 
in a <RESULT>...</RESULT> block. The <RESULT> block must contain EXACTLY "Yes" or "No" (nothing else).

Example format:
<REASONING>This is my reasoning about importance.</REASONING>
<RESULT>Yes/No</RESULT>
""",
    )

    def user_message_generator(_goal, _node):
        return (
            f"- A final task goal: {_goal}\n"
            f"- A subtask description: {task_def_toString(_node, _goal)}"
        )

    few_shot_messages = []
    if len(few_shot_examples) > 0:
        for example in few_shot_examples:
            example_reasoning = example["user_reasoning"]
            example_evaluation = example["user_evaluation"]
            few_shot_messages.append(
                TextMessage(
                    content=user_message_generator(goal, example["node"]),
                    source="user",
                )
            )

            few_shot_messages.append(
                TextMessage(
                    content=(
                        f"<REASONING>{example_reasoning}</REASONING>\n"
                        f"<RESULT>{'Yes' if example_evaluation else 'No'}</RESULT>"
                    ),
                    source="assistant",
                )
            )

    user_message = user_message_generator(goal, node)
    messages = few_shot_messages + [TextMessage(content=user_message, source="user")]

    response = await importance_evaluation_agent.on_messages(
        messages,
        cancellation_token=CancellationToken(),
    )

    result_text = response.chat_message.content.strip()

    reasoning_match = re.search(r"<REASONING>(.*?)</REASONING>", result_text, re.DOTALL)
    reasoning = reasoning_match.group(1).strip() if reasoning_match else ""

    result_match = re.search(r"<RESULT>(.*?)</RESULT>", result_text, re.DOTALL)
    final_result = result_match.group(1).strip() if result_match else "No"

    importance_value = 1 if final_result == "Yes" else 0 if final_result == "No" else -1

    # save_json(
    #     {
    #         "system_message": complexity_evaluation_agent._system_messages[0].content,
    #         "few_shot_messages": list(
    #             map(lambda m: m.source + ": " + m.content, few_shot_messages)
    #         ),
    #         "response": result,
    #     },
    #     "complexity_evaluation.json",
    # )

    print(reasoning)

    return importance_value

In [6]:
complexity = await run_complexity_evaluation_agent(
    text = 'This step is crucial as it allows you to identify the key components (entities) and how they are related to each other, which is foundational for constructing a knowledge graph.',
    model = 'gpt-4o-mini',
    api_key=open("../api_key").read().strip()
)
complexity

1

In [7]:
coherence = await run_coherence_evaluation_agent(
    parent_part = 'This step is crucial as it allows you to identify the key components (entities) and how they are related to each other, which is foundational for constructing a knowledge graph.',
    child_part = 'This step is crucial as it allows you to identify the key components (entities) and how they are related to each other, which is foundational for constructing a knowledge graph.',
    model = 'gpt-4o-mini',
    api_key=open("../api_key").read().strip()
)
coherence

0

In [8]:
importance = await run_importance_evaluation_agent(
    final_goal = "Entity Extraction",
    subtask_description = "This step is crucial as it allows you to identify the key components (entities) and how they are related to each other, which is foundational for constructing a knowledge graph.",
    model = 'gpt-4o-mini',
    api_key=open("../api_key").read().strip()
)
importance

0

In [25]:
from tqdm import tqdm
import asyncio
async def run_all_evaluations(goal: str, node_str: str, parent_node_str: str, model: str, api_key: str, evaluations: list=[]):
    tasks = [
        run_complexity_evaluation_agent(text=node_str, model=model, api_key=api_key),
        run_coherence_evaluation_agent(parent_part=parent_node_str, child_part=node_str, model=model, api_key=api_key),
        run_importance_evaluation_agent(final_goal=goal, subtask_description=node_str, model=model, api_key=api_key),
    ]

    results = await asyncio.gather(*tasks)
    return results
goal = "I want to construct a knowledge graph from a dataset of Wikipedia articles."
node_str = """Task: Extract Entities
            Description: Identify and extract relevant entities from the collection of documents."""
parent_node_str =  """Task: Root Node
            Description: Start of the process."""
model = "gpt-4o-mini"
api_key = open("../api_key").read().strip()
[a, b, c] = await run_all_evaluations(goal, node_str, parent_node_str, model, api_key)
print(a, b, c, (a+b+c)/3)



0 0 0 0.0


In [19]:
complexity_definition: str = (
        "A text is considered complex if it requires advanced knowledge "
        "or expertise, contains multiple layered or specialized concepts, or "
        "requires multi-step reasoning to understand or accomplish the described goal. "
        "Otherwise, it's considered not complex."
    ),
model_client = OpenAIChatCompletionClient(
        model=model,
        api_key=api_key,
    )
complexity_evaluation_agent = AssistantAgent(
        name="complexity_evaluation_agent",
        model_client=model_client,
        # temperature=0.5,
        system_message=f"""You are a text complexity evaluator. The user will provide some text that describes a task.
Your job is to evaluate whether the text is 'complex' or 'not complex' based on the following definition:
{complexity_definition}

If the text meets the complexity definition, respond with:
"Yes"

Otherwise, respond with:
"No"

Output must be EXACTLY one of these words, with no additional formatting, punctuation, or explanation.
""",
    )
complexity_evaluation_agent._system_messages[0].content

'You are a text complexity evaluator. The user will provide some text that describes a task.\nYour job is to evaluate whether the text is \'complex\' or \'not complex\' based on the following definition:\n("A text is considered complex if it requires advanced knowledge or expertise, contains multiple layered or specialized concepts, or requires multi-step reasoning to understand or accomplish the described goal. Otherwise, it\'s considered not complex.",)\n\nIf the text meets the complexity definition, respond with:\n"Yes"\n\nOtherwise, respond with:\n"No"\n\nOutput must be EXACTLY one of these words, with no additional formatting, punctuation, or explanation.\n'

In [51]:
complexity_definition = (
    "A text is considered complex if it requires advanced knowledge "
    "or expertise, contains multiple layered or specialized concepts, or "
    "requires multi-step reasoning to understand or accomplish the described goal. "
    "Otherwise, it's considered not complex."
)

complexity = await run_complexity_evaluation_agent(
    goal = "Understanding Document Relationships",
    node = "This step is crucial as it allows you to identify the key components (entities) and how they are related to each other, which is foundational for constructing a knowledge graph.",
    model = "gpt-4o-mini",
    api_key = open("../../api_key").read().strip(),
    complexity_definition=complexity_definition,
    few_shot_examples=[]
)

complexity

The text requires some understanding of knowledge graphs and the relationship between entities within that context, suggesting a need for specialized knowledge in data representation or information science. Identifying key components and their relationships also implies a level of analysis that entails multi-step reasoning. Therefore, this text involves layered concepts and necessitates advanced understanding, making it complex.


1

In [44]:
coherence_definition = (
    "Two text pieces are considered coherent in a sequence if the second "
    "logically or thematically follows from the first, maintains consistency with it, "
    "and does not present a contradictory or unrelated concept."
)

coherence = await run_coherence_evaluation_agent(
    goal = "Understanding Document Relationships",
    parent_node = "Entity Extraction",
    child_node = "Entity Relationship Tree Construction",
    model = "gpt-4o-mini",
    api_key=open("../../api_key").read().strip(),
    coherence_definition=coherence_definition,
    few_shot_examples=[]
)
coherence

The Parent Part discusses "Entity Extraction," which typically involves identifying and categorizing key entities from a given text. The Child Part, "Entity Relationship Tree Construction," logically follows as it suggests a subsequent step where the entities extracted are organized into a structured format that illustrates the relationships between them. This shows a progression in data processing, where the foundational work of entity extraction leads naturally to the construction of relationships between those entities. There is no contradiction and the concepts are related within the same domain of information processing.


1

In [35]:
importance_definition: str = (
    (
        "A subtask is considered important if it is critical, essential, "
        "or significantly beneficial to achieving the final goal. If it is tangential, "
        "optional, or has minimal impact, then it is not important."
    ),
)

importance = await run_importance_evaluation_agent(
    goal = "Understanding Document Relationships",
    node = "Entity Extraction",
    model = "gpt-4o-mini",
    api_key=open("../../api_key").read().strip(),
    importance_definition=importance_definition,
    few_shot_examples=[]
)
importance

Entity extraction is critical for understanding document relationships as it involves identifying and classifying key components and entities within the documents. This information is essential for establishing connections and understanding how different documents relate to each other. Without entity extraction, it would be challenging to analyze the relationships accurately, making it a fundamental part of achieving the overall goal. Therefore, this subtask is important.


1