# Basic Settings

## API Keys

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("API_KEY")
os.environ["OPENAI_API_KEY"] = api_key

## Define Tools

In [2]:
from tools import Tools

tools = Tools()
tool_dict = tools.tool_dict

print("This app is using the following tool:")
for tool in tool_dict:
    print(tool)

This app is using the following tool:
none
website_info_retriever
website_links_crawler
website_reader
pdf_reader
read_user_input_and_plan
read_execution_chat_log
read_execution_team_agents_prompt


## Read Agent Parameter (yaml)

In [3]:
import yaml

# ! 注意yaml檔案版本
with open('agents_parameter.yaml', 'r', encoding="utf-8") as file:
    agents_parameter = yaml.safe_load(file)

# Execution Team

## Define Agents

### Planner

In [4]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from typing import List


class Plan(BaseModel):
    """Plan to follow in future"""

    steps: List[str] = Field(
        description="different steps to follow, should be in sorted order"
    )

planner_llm_config = agents_parameter["Planner"]["llm_config"]
planner_system_prompt = agents_parameter["Planner"]["prompt"]

planner_llm = ChatOpenAI(model=planner_llm_config["model"], temperature=planner_llm_config["temperature"])
planner_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", planner_system_prompt),
        ("placeholder", "{user_input}"), # placeholer 用來動態嵌入使用者輸入的訊息
    ]
)

planner = planner_prompt | planner_llm.with_structured_output(Plan) # 限制使用特定模板回答問題

print("planner_llm_config:")
for key, value in planner_llm_config.items():
    print(f"{key}: {value}")
print("planner_system_prompt: \n" + planner_system_prompt)

planner_llm_config:
model: gpt-4o
temperature: 0
planner_system_prompt: 
You are a Planner Agent in an LLM-based multi-agent system designed to make plans for Executor Agents to follow in order to fulfill user requests by gathering information or operating system of National Central University.

Your job is to generate clear, logical, and actionable step-by-step plans that guide other agents to fulfill the user's request. Each plan step should include:
  - A brief explanation of what the step aims to accomplish
  - A clear description of what needs to be found or processed
  - An output placeholder (e.g., #E1, #E2, etc.) for use in later steps

You must use variables like #E1, #E2, etc., to represent intermediate results that can be referenced in later steps. Make sure each step builds clearly upon previous steps.
At the end of the plan, the final step should return the content that most accurately and completely fulfills the user's request.

Please strictly follow this reasoning frame

In [None]:
response = planner.invoke({"user_input": [("user", "Please provide a brief introduction to the content of the 111 Academic Affairs Regulations.")]})
for step in response.steps:
    print(step)

Identify which website(s) from a pre-constructed internal database are most relevant to the user's input or intent. #E1 = [The most relevant website url based on user input]
---
Analyze the content of the selected website to determine whether it contains enough information to address the user's need. #E2 = [Page content of #E1]
---
Evaluate whether the content in #E2 is sufficient to satisfy the user's request. #E3 = [Assessment of sufficiency]
---
If #E3 suggests the information is insufficient, locate hyperlinks or references within the page that are most likely to lead to more useful or relevant information. #E4 = [List of relevant hyperlinks from #E2]
---
If the content is insufficient, choose the most promising hyperlink and repeat the content reading step. #E5 = [Content of newly navigated page]
---
If the information is sufficient, extract the relevant details that directly address the user's request. #E6 = [Final answer based on #E2 or #E5]
---


In [6]:
response = planner.invoke({"user_input": [("user", "Please help me fill out the leave application on the school website.")]})
for step in response.steps:
    print(step)

Identify the specific system or platform that the user needs to interact with, based on internal service mappings. #E1 = [Target system or module to operate, e.g., "NCU Leave Application System"]
Determine the login or authentication requirements to access the system. #E2 = [Authentication method, required credentials, or SSO process for #E1]
Identify the exact operation the user wants to perform within the system (e.g., fill out leave application form). #E3 = [Description of intended system operation]
Locate the UI entry point or API endpoint required to initiate the operation. #E4 = [URL, button label, or function name related to #E3 in #E1]
Specify the input parameters or interaction steps needed to complete the operation (e.g., which fields to fill, which buttons to click). #E5 = [Step-by-step interaction procedure with required parameters]
Execute the system operation and capture the result or response from the system. #E6 = [Operation outcome, confirmation message, or data return

### Executor

In [None]:
# from langchain_openai import ChatOpenAI
# from langgraph.prebuilt import create_react_agent

# executor_model = agents_parameter["Executor"]["model"]
# executor_prompt = agents_parameter["Executor"]["prompt"]

# executor_llm = ChatOpenAI(model=executor_model)
# executor_tool_list = [tool_dict[name] for name in agents_parameter['Executor']['tool_list']]

# executor = create_react_agent(executor_llm, executor_tool_list, prompt=executor_prompt)

# print("executor_model: " + executor_model)
# print("executor_prompt: \n" + executor_prompt)
# print("executor_tool_list: ")
# for tool in executor_tool_list:
#     print(tool.name)

from agents import create_react_agent_with_yaml

executor = create_react_agent_with_yaml("Executor")

In [None]:
# response = executor.invoke({"messages": [("user", "Who is the headmaster of National Central University in Taiwan?")]})
# for message in response["messages"]:
#     print(message)

### Replanner

In [None]:
from typing import Union
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI


class Response(BaseModel):
    """Response to user."""

    response: str


class Act(BaseModel):
    """Action to perform."""

    action: Union[Response, Plan] = Field(
        description="Action to perform. If you want to respond to user, use Response. "
        "If you need to further use tools to get the answer, use Plan."
    )

replanner_model = agents_parameter["Replanner"]["model"]
replanner_system_prompt = f"{agents_parameter['Replanner']['prompt']}"

replanner_llm = ChatOpenAI(model=replanner_model) # ! Replanner需要使用gpt-4o才不會一直call tools
replanner_prompt = ChatPromptTemplate.from_template(replanner_system_prompt)

replanner = replanner_prompt | replanner_llm.with_structured_output(Act) # 限制使用特定模板回答問題

print("replanner_model: " + replanner_model)
print("replanner_prompt: \n" + replanner_system_prompt)

### Solver

In [None]:
solver_model = agents_parameter["Solver"]["model"]
solver_system_prompt = agents_parameter["Solver"]["prompt"]

solver_llm = ChatOpenAI(model=solver_model)
solver_prompt = ChatPromptTemplate.from_template(solver_system_prompt)

solver = solver_prompt | solver_llm

print("solver_model: " + solver_model)
print("solver_system_prompt: \n" + solver_system_prompt)

## Define Graph State

In [None]:
import operator
from typing import Annotated, List, Tuple, Any
from typing_extensions import TypedDict


class PlanExecute(TypedDict):
    input: str
    plan: List[str]
    past_steps: Annotated[List[Tuple], operator.add]
    response: str
    history: List[Tuple[str, Any]]

## Define Agent Node

In [None]:
async def plan_step(state: PlanExecute):
    plan = await planner.ainvoke({"user_input": [("user", state["input"])]}) # 對應到planner system prompt中的{user_input}
    state["history"].append(("Planner", plan.steps)) # 將plan的步驟加入history中

    return {
        "plan": plan.steps,
        "history": state["history"],
    }

async def execute_step(state: PlanExecute):
    plan = state["plan"]
    plan_str = "\n".join(f"{i+1}. {step}" for i, step in enumerate(plan))
    task = plan[0]
    task_formatted = f"""For the following plan:
{plan_str}\n\nYou are tasked with executing step {1}, {task}."""
    agent_response = await executor.ainvoke({"messages": [("user", task_formatted)]}) # react agent 用 messages 方式接收訊息
    state["history"].append(("Executor", (task, agent_response["messages"][-1].content)))

    return {
        "past_steps": [(task, agent_response["messages"][-1].content)], # react agent 接收訊息方式
        "history": state["history"],
    }

async def replan_step(state: PlanExecute):
    # 過濾掉state中不需要的欄位
    temp_state = state.copy()
    temp_state.pop("history")

    output = await replanner.ainvoke(temp_state)
    if isinstance(output.action, Response):
        state["history"].append(("Replanner", output.action.response))
        return {
            "response": output.action.response,
            "history": state["history"],
        }
    else:
        state["history"].append(("Replanner", output.action.steps))
        return {
            "plan": output.action.steps,
            "history": state["history"],
        }

async def solve_step(state: PlanExecute):
    print("history:")
    print(state["history"])
    response = await solver.ainvoke({"user_input": state["input"], "planning_history": state["history"]})
    return {"response": response.content, "history": state["history"]}

def should_end(state: PlanExecute):
    if "response" in state and state["response"]:
        return "solver"
    else:
        return "executor"

## Create Graph

In [None]:
from langgraph.graph import StateGraph, START, END

workflow = StateGraph(PlanExecute)

workflow.add_node("planner", plan_step)
workflow.add_node("executor", execute_step)
workflow.add_node("replanner", replan_step)
workflow.add_node("solver", solve_step)

workflow.add_edge(START, "planner")
workflow.add_edge("planner", "executor")
workflow.add_edge("executor", "replanner")
workflow.add_conditional_edges(
    "replanner",
    # Next, we pass in the function that will determine which node is called next.
    should_end,
    ["executor", "solver"],
)
workflow.add_edge("solver", END)

app = workflow.compile() # This compiles it into a LangChain Runnable, meaning you can use it as you would any other runnable

In [None]:
from IPython.display import Image, display

display(Image(app.get_graph(xray=True).draw_mermaid_png()))

## Run App

In [None]:
sequence = 0

with open("execution_chat_log.txt", "w") as f:
    f.write("")

def write_to_chat_log(content):
    with open("execution_chat_log.txt", "a") as f:
        f.write(content)

# Who is the headmaster of National Central University in Taiwan?
config = {"recursion_limit": 50}
inputs = {
    "input": "Who is the headmaster of National Central University ?",
    "history": [], # 初始化儲存History的list
}
write_to_chat_log(f"User Query:\n{inputs['input']}\n\n")

# async for event in app.astream(inputs, config=config):
#     for agent, state in event.items():
#         if agent != "__end__":
#             write_to_chat_log(f"{agent}:\n")
#             # ! Jupyter Notebook 裡使用 global sequence 會報錯，需要使用 nest_asyncio
#             # global sequence
#             # sequence += 1
#             # write_to_chat_log(f"{sequence}. {agent}:\n")

#             for key, value in state.items():
#                 if (key != "history"):
#                     write_to_chat_log(f"{key}: {value}\n")
            
#             write_to_chat_log("\n")

# Evaluation Team

## Define Agents

### Critic

In [4]:
from agents import create_react_agent_with_yaml

# * 根據使用者輸入和計畫制定生成評估標準
critic = create_react_agent_with_yaml("Critic")

planner_llm_config:
model: gpt-4o
temperature: 0
planner_system_prompt: 
You are a Planner Agent in an LLM-based multi-agent system designed to assist users in navigating and understanding school websites.

Your job is to generate clear, logical, and actionable step-by-step plans that guide other agents to fulfill the user's request. Each plan step should include:
  - A brief explanation of what the step aims to accomplish
  - A clear description of what needs to be found or processed
  - An output placeholder (e.g., #E1, #E2, etc.) for use in later steps

Please strictly follow this reasoning framework when designing your plan:
  1. Begin by identifying which website(s) from a pre-constructed internal database are most relevant to the user's input or intent.
  2. Analyze the content of the selected website to determine whether it contains enough information to address the user's need.
  3. If the content is **insufficient**, locate hyperlinks or references within the page that are mos

In [5]:
# response = critic.invoke({"messages": [("user", "Please evaluate the performance of execution team.")]})

# # 暫存評估標準，之後儲存到state內交給evaluator
# with open("Docs/evaluation_rubric.txt", "w") as f:
#     f.write(f"{response['messages'][-1].content}\n\n")

In [6]:
# print(response["messages"][-1].content)

In [7]:
# 查看調用工具情形
# for message in response["messages"]:
#     print(message)
#     if not message.content:
#         for item in message:
#             print(item)

### Evaluator

In [8]:
from agents import create_react_agent_with_yaml

# * 根據評估者提供的評估框架和評估執行團隊的任務執行成效
evaluator = create_react_agent_with_yaml("Evaluator")

Evaluator_llm_config:
model: gpt-4o-mini
temperature: 0
Evaluator_prompt: 
You are an Evaluator Agent in a multi-agent system tasked with assessing the performance of the Execution Team based on a predefined evaluation rubric.

You will be given a structured evaluation rubric created by the Critic Agent, with detailed expectations for each step of the original plan, including fallback evaluation rules for steps introduced later through replanning

You should use read_execution_chat_log to get the chat log that captures the actual actions and responses produced by the Execution Team while executing a multi-step plan.

Your goal is to evaluate how well each executed step aligns with the rubric. This includes:
- Identifying each step from the execution log and linking it to a rubric entry (original or fallback)
- Scoring each step based on whether it satisfies the rubric’s evaluation criteria
- Explaining the reasoning behind each score using specific evidence from the chat log
- Providin

In [9]:
# with open('Docs/evaluation_rubric.txt', 'r') as file:
#     evaluation_rubric = file.read()

# response = evaluator.invoke({"messages": [("user", evaluation_rubric)]})

# # 暫存評估結果，之後儲存到state內交給analyzer
# with open("evaluation_result.txt", "w") as f:
#     f.write(f"{response['messages'][-1].content}\n\n")

In [10]:
# print(response["messages"][-1].content)

In [11]:
# # 查看調用工具情形
# for message in response["messages"]:
#     print(message)
#     if not message.content:
#         for item in message:
#             print(item)

### Analyzer

In [12]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

analyzer_llm_config = agents_parameter["Analyzer"]["llm_config"]
analyzer_system_prompt = agents_parameter["Analyzer"]["prompt"]

analyzer_llm = ChatOpenAI(model=analyzer_llm_config["model"], temperature=analyzer_llm_config["temperature"])
analyzer_prompt = ChatPromptTemplate.from_template(analyzer_system_prompt)

analyzer = analyzer_prompt | analyzer_llm

print("analyzer_llm_config:")
for key, value in analyzer_llm_config.items():
    print(f"{key}: {value}")
print("analyzer_system_prompt: \n" + analyzer_system_prompt)

analyzer_llm_config:
model: gpt-4o-mini
temperature: 0
analyzer_system_prompt: 
You are an Attribution Agent in a multi-agent system. Your job is to analyze the performance evaluation results of an execution process and identify which agent in the execution team (Planner, Executor, Replanner) was responsible for any underperformance in the task.

You will be given:
- A full execution evaluation report generated by an Evaluator Agent
- The responsibility of each agent:
  - Planner: Designs the initial multi-step plan based on the user query
  - Executor: Executes each plan step by using appropriate tools
  - Replanner: Revises the plan dynamically when previous steps are insufficient or fail

For each step with a score of **Partially Met** or **Not Met**, do the following:
1. Summarize what went wrong based on the evaluation report
2. Identify the most responsible agent (Planner, Executor, or Replanner), based on who influenced that step the most
3. Explain why this agent is responsible

In [13]:
# with open("Docs/evaluation_result.txt") as file:
#     evaluation_result = file.read()

# response = analyzer.invoke({"evaluation_result": evaluation_result})

In [14]:
# print(response.content)

## Define Graph State

In [15]:
from typing_extensions import TypedDict

class Evaluation(TypedDict):
    input: str
    rubric: str
    result: str
    judgment: str

## Define Agent Node

In [16]:
async def critic_step(state: Evaluation):
    response = await critic.ainvoke({"messages": [("user", state["input"])]})
    state["rubric"] = response["messages"][-1].content # 儲存評估標準到state內
    return {
        "rubric": state["rubric"],
    }

async def evaluator_step(state: Evaluation):
    response = await evaluator.ainvoke({"messages": [("user", state["rubric"])]})
    state["result"] = response["messages"][-1].content # 儲存評估結果到state內
    return {
        "result": state["result"],
    }

async def analyzer_step(state: Evaluation):
    response = await analyzer.ainvoke({"evaluation_result": state["result"]})
    state["judgment"] = response.content # 儲存分析結果到state內
    return {
        "judgment": state["judgment"],
    }

## Create Graph

In [17]:
from langgraph.graph import StateGraph, START, END
from IPython.display import Image, display

evaluation_workflow = StateGraph(Evaluation)

evaluation_workflow.add_node("critic", critic_step)
evaluation_workflow.add_node("evaluator", evaluator_step)
evaluation_workflow.add_node("analyzer", analyzer_step)

evaluation_workflow.add_edge(START, "critic")
evaluation_workflow.add_edge("critic", "evaluator")
evaluation_workflow.add_edge("evaluator", "analyzer")
evaluation_workflow.add_edge("analyzer", END)

evaluation_app = evaluation_workflow.compile() # This compiles it into a LangChain Runnable, meaning you can use it as you would any other runnable

display(Image(evaluation_app.get_graph(xray=True).draw_mermaid_png()))

ReadTimeout: HTTPSConnectionPool(host='mermaid.ink', port=443): Read timed out. (read timeout=10)

## Run App

In [18]:
sequence = 0

with open("evaluation_chat_log.txt", "w") as f:
    f.write("")

def write_to_chat_log(content):
    with open("evaluation_chat_log.txt", "a") as f:
        f.write(content)

# Please evaluate the performance of execution team.
config = {"recursion_limit": 50}
inputs = {
    "input": "Please evaluate the performance of execution team.",
}
write_to_chat_log(f"Evaluation Query:\n{inputs['input']}\n\n")

async for event in evaluation_app.astream(inputs, config=config):
    for agent, state in event.items():
        if agent != "__end__":
            write_to_chat_log(f"{agent}:\n")
            # ! Jupyter Notebook 裡使用 global sequence 會報錯，需要使用 nest_asyncio
            # global sequence
            # sequence += 1
            # write_to_chat_log(f"{sequence}. {agent}:\n")

            for key, value in state.items():
                if (key != "history"):
                    write_to_chat_log(f"{key}: {value}\n")
            
            write_to_chat_log("\n")