# API Key

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("API_KEY")
os.environ["OPENAI_API_KEY"] = api_key

# Execution Team

## Define Agents

In [2]:
from agent import ExecutionAgent

execution_agent = ExecutionAgent()

planner = execution_agent.planner
# executor = execution_agents.executor
executor = execution_agent.web_executor # 測試 web executor
replanner = execution_agent.replanner
solver = execution_agent.solver

Planner_llm_config:
model: gpt-4.1-mini
temperature: 0

Planner_prompt: 
You are a Planner Agent in an LLM-based multi-agent system designed to make plans for Executor Agents to follow in order to fulfill user requests by gathering information or operating systems related to National Central University.

Your job is to generate clear, logical, and actionable step-by-step plans that guide other agents to fulfill the user's request. Each plan step should include:
  - A brief explanation of what the step aims to accomplish
  - A clear description of what needs to be found or processed
  - An output placeholder (e.g., #E1, #E2, etc.) for use in later steps

You must use variables like #E1, #E2, etc., to represent intermediate results that can be referenced in later steps, and ensure each step builds upon the previous one. The final plan should be concise, clearly structured, and executable by an Executor Agent.

Use the following reasoning framework **as a flexible guide** and adjust steps

In [3]:
# response = planner.invoke({"user_input": [("user", "Please help me apply leave application.")]})
# for step in response.steps:
#     print(step)

In [3]:
import time

execution_agent.web_operation_tool.create_browser()

start_time = time.time()

response = executor.invoke({"messages": [("user", "Step 1. function_name: 'navigate_with_url', parameters: '{\"url\":\"https://cis.ncu.edu.tw/iNCU/stdAffair/leaveRequest\"}'")]})
print(response["messages"][-1].content)
end_time = time.time()

print(f"Execution time: {end_time - start_time} seconds")

execution_agent.web_operation_tool.selenium_controller.clean_containers() # *selenium controller解構子有問題，必須runtime內清除

Creating container for user_id 1130...
Container suspicious_jang created on port 10050
Container created for user_id 1130 on port 10050
Connected to Selenium container for user_id 1130 on port 10050
Browser for user_id 1130 navigated to https://cis.ncu.edu.tw/iNCU/stdAffair/leaveRequest
Screenshot saved for user_id 1130 at screenshots/website_screenshot_1.png
#E1 = 已成功導覽至中興大學請假系統頁面 https://cis.ncu.edu.tw/iNCU/stdAffair/leaveRequest 。請問接下來需要我執行什麼操作？
Execution time: 4.112503528594971 seconds
Cleaning up all containers...
Found 1 containers.
Container suspicious_jang has been stopped and removed.


In [4]:
print(response)

{'messages': [HumanMessage(content='Step 1. function_name: \'navigate_with_url\', parameters: \'{"url":"https://cis.ncu.edu.tw/iNCU/stdAffair/leaveRequest"}\'', additional_kwargs={}, response_metadata={}, id='fc8362c6-15b2-4476-bd9d-ee1669cc8f63'), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_EToVGKRhJVaoC20Vm4iRxAWi', 'function': {'arguments': '{"url":"https://cis.ncu.edu.tw/iNCU/stdAffair/leaveRequest"}', 'name': 'navigate_with_url'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 928, 'total_tokens': 959, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-mini-2025-04-14', 'system_fingerprint': 'fp_38647f5e19', 'id': 'chatcmpl-BOjM7Y5oJSKdVxk1yBFu7tpZlIgmJ', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-

## Define Graph State

In [None]:
import operator
from typing import Annotated, List, Tuple, Any
from typing_extensions import TypedDict


class PlanExecute(TypedDict):
    input: str
    plan: List[str]
    past_steps: Annotated[List[Tuple], operator.add]
    response: str
    history: List[Tuple[str, Any]]

## Define Agent Node

In [None]:
async def plan_step(state: PlanExecute):
    plan = await planner.ainvoke({"user_input": [("user", state["input"])]}) # 對應到planner system prompt中的{user_input}
    state["history"].append(("Planner", plan.steps)) # 將plan的步驟加入history中

    return {
        "plan": plan.steps,
        "history": state["history"],
    }

async def execute_step(state: PlanExecute):
    plan = state["plan"]
    plan_str = "\n".join(f"{i+1}. {step}" for i, step in enumerate(plan))
    task = plan[0]
    task_formatted = f"""For the following plan:
{plan_str}\n\nYou are tasked with executing step {1}, {task}."""
    agent_response = await executor.ainvoke({"messages": [("user", task_formatted)]}) # react agent 用 messages 方式接收訊息
    state["history"].append(("Executor", (task, agent_response["messages"][-1].content)))

    return {
        "past_steps": [(task, agent_response["messages"][-1].content)], # react agent 接收訊息方式
        "history": state["history"],
    }

async def replan_step(state: PlanExecute):
    # 過濾掉state中不需要的欄位
    temp_state = state.copy()
    temp_state.pop("history")

    output = await replanner.ainvoke(temp_state)
    if isinstance(output.action, execution_agent.Response):
        state["history"].append(("Replanner", output.action.response))
        return {
            "response": output.action.response,
            "history": state["history"],
        }
    else:
        state["history"].append(("Replanner", output.action.steps))
        return {
            "plan": output.action.steps,
            "history": state["history"],
        }

async def solve_step(state: PlanExecute):
    print("history:")
    print(state["history"])
    response = await solver.ainvoke({"user_input": state["input"], "planning_history": state["history"]})
    return {"response": response.content, "history": state["history"]}

def should_end(state: PlanExecute):
    if "response" in state and state["response"]:
        return "solver"
    else:
        return "executor"

## Create Graph

In [None]:
from langgraph.graph import StateGraph, START, END

execution_workflow = StateGraph(PlanExecute)

execution_workflow.add_node("planner", plan_step)
execution_workflow.add_node("executor", execute_step)
execution_workflow.add_node("replanner", replan_step)
execution_workflow.add_node("solver", solve_step)

execution_workflow.add_edge(START, "planner")
execution_workflow.add_edge("planner", "executor")
execution_workflow.add_edge("executor", "replanner")
execution_workflow.add_conditional_edges(
    "replanner",
    # Next, we pass in the function that will determine which node is called next.
    should_end,
    ["executor", "solver"],
)
execution_workflow.add_edge("solver", END)

execution_app = execution_workflow.compile() # This compiles it into a LangChain Runnable, meaning you can use it as you would any other runnable

In [None]:
from IPython.display import Image, display
from PIL import Image as PILImage
from io import BytesIO

graph_bytes = execution_app.get_graph(xray=True).draw_mermaid_png()

# output_file_path = "test.jpg"
# with BytesIO(graph_bytes) as byte_stream:
#     image = PILImage.open(byte_stream)
#     image.save(output_file_path, format="PNG")

display(Image(graph_bytes))

In [None]:
# import time
# import nest_asyncio
# nest_asyncio.apply()

# start_time = time.time()
# result = tool_dict["website_links_crawler"].invoke({"link": "https://pdc.adm.ncu.edu.tw/#&panel1-1"})
# # website_links_crawler("https://www.ncu.edu.tw/tw/")



# end_time = time.time()
# execution_time = end_time - start_time

# print(f"Execution time: {execution_time} seconds")

## Run App

In [None]:
import nest_asyncio
import time

with open("Outputs/execution_chat_log.txt", "w") as f:
    f.write("")

def write_to_chat_log(content):
    with open("Outputs/execution_chat_log.txt", "a") as f:
        f.write(content)

# Who is the headmaster of National Central University in Taiwan?
# Summarize the content of the 111 Academic Affairs Regulations.
# Please help me gather information related to scholarship applications.
# Please help me fill out the leave application on the school website.
config = {"recursion_limit": 30}
inputs = {
    "input": "Please help me gather information related to scholarship applications.",
    "history": [], # 初始化儲存History的list
}
write_to_chat_log(f"User Query:\n{inputs['input']}\n\n")

# tool_dict["create_browser"].invoke(input=None)

nest_asyncio.apply()
start_time = time.time()
async for event in execution_app.astream(inputs, config=config):
    for agent, state in event.items():
        if agent != "__end__":
            write_to_chat_log(f"{agent}:\n")

            for key, value in state.items():
                if (key != "history"):
                    write_to_chat_log(f"{key}: {value}\n")
            
            write_to_chat_log("\n")
end_time = time.time()

execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")
# del tools.selenium_controller

# Evaluation Team

## Define Agents

### Critic

In [None]:
from agent import create_react_agent_with_yaml

# * 根據使用者輸入和計畫制定生成評估標準
critic = create_react_agent_with_yaml("Critic")

In [None]:
# response = critic.invoke({"messages": [("user", "Please evaluate the performance of execution team.")]})

# # 暫存評估標準，之後儲存到state內交給evaluator
# with open("Docs/evaluation_rubric.txt", "w") as f:
#     f.write(f"{response['messages'][-1].content}\n\n")

In [None]:
# print(response["messages"][-1].content)

In [None]:
# 查看調用工具情形
# for message in response["messages"]:
#     print(message)
#     if not message.content:
#         for item in message:
#             print(item)

### Evaluator

In [None]:
from agent import create_react_agent_with_yaml

# * 根據評估者提供的評估框架和評估執行團隊的任務執行成效
evaluator = create_react_agent_with_yaml("Evaluator")

In [None]:
# with open('Docs/evaluation_rubric.txt', 'r') as file:
#     evaluation_rubric = file.read()

# response = evaluator.invoke({"messages": [("user", evaluation_rubric)]})

# # 暫存評估結果，之後儲存到state內交給analyzer
# with open("evaluation_result.txt", "w") as f:
#     f.write(f"{response['messages'][-1].content}\n\n")

In [None]:
# print(response["messages"][-1].content)

In [None]:
# # 查看調用工具情形
# for message in response["messages"]:
#     print(message)
#     if not message.content:
#         for item in message:
#             print(item)

## Define Graph State

In [None]:
from typing_extensions import TypedDict

class Evaluation(TypedDict):
    input: str
    rubric: str
    result: str
    judgment: str

## Define Agent Node

In [None]:
async def critic_step(state: Evaluation):
    response = await critic.ainvoke({"messages": [("user", state["input"])]})
    state["rubric"] = response["messages"][-1].content # 儲存評估標準到state內
    return {
        "rubric": state["rubric"],
    }

async def evaluator_step(state: Evaluation):
    response = await evaluator.ainvoke({"messages": [("user", state["rubric"])]})
    state["result"] = response["messages"][-1].content # 儲存評估結果到state內
    return {
        "result": state["result"],
    }

## Create Graph

In [None]:
from langgraph.graph import StateGraph, START, END
from IPython.display import Image, display

evaluation_workflow = StateGraph(Evaluation)

evaluation_workflow.add_node("critic", critic_step)
evaluation_workflow.add_node("evaluator", evaluator_step)

evaluation_workflow.add_edge(START, "critic")
evaluation_workflow.add_edge("critic", "evaluator")
evaluation_workflow.add_edge("evaluator", END)

evaluation_app = evaluation_workflow.compile() # This compiles it into a LangChain Runnable, meaning you can use it as you would any other runnable

In [None]:
# display(Image(evaluation_app.get_graph(xray=True).draw_mermaid_png()))

## Run App

In [None]:
import time

with open("Outputs/evaluation_chat_log.txt", "w") as f:
    f.write("")

def write_to_chat_log(content):
    with open("Outputs/evaluation_chat_log.txt", "a") as f:
        f.write(content)

# Please evaluate the performance of execution team.
config = {"recursion_limit": 50}
inputs = {
    "input": "Please evaluate the performance of execution team.",
}
write_to_chat_log(f"Evaluation Query:\n{inputs['input']}\n\n")

start_time = time.time()
async for event in evaluation_app.astream(inputs, config=config):
    for agent, state in event.items():
        if agent != "__end__":
            write_to_chat_log(f"{agent}:\n")

            for key, value in state.items():
                if (key != "history"):
                    write_to_chat_log(f"{key}: {value}\n")
            
            write_to_chat_log("\n")
end_time = time.time()

evaluation_time = end_time - start_time
print(f"Evaluation time: {evaluation_time:.2f} seconds")

# Evolution Team

## Define Agents

### Analyzer

In [None]:
# from langchain_openai import ChatOpenAI
# from langchain_core.prompts import ChatPromptTemplate

# analyzer_llm_config = agents_parameter["Analyzer"]["llm_config"]
# analyzer_system_prompt = agents_parameter["Analyzer"]["prompt"]

# analyzer_llm = ChatOpenAI(model=analyzer_llm_config["model"], temperature=analyzer_llm_config["temperature"])
# analyzer_prompt = ChatPromptTemplate.from_template(analyzer_system_prompt)

# analyzer = analyzer_prompt | analyzer_llm

# print("analyzer_llm_config:")
# for key, value in analyzer_llm_config.items():
#     print(f"{key}: {value}")
# print("analyzer_system_prompt: \n" + analyzer_system_prompt)

In [None]:
from agent import create_react_agent_with_yaml

analyzer = create_react_agent_with_yaml("Analyzer")

In [None]:
# response = analyzer.invoke({"messages": [("user", "Please analyze the evaluation result of the execution team.")]})

In [None]:
# print(response["messages"][-1].content)

### Prompt Optimizer

In [None]:
from agent import create_react_agent_with_yaml
from pydantic import BaseModel, Field

class Optimization_Response(BaseModel):
    """Optimization response to user."""
    
    updated_agent_system_prompt: str = Field(
        description="The complete updated system prompt for the agent that is most responsible for the identified issue."
    )

prompt_optimizer = create_react_agent_with_yaml("Prompt Optimizer")

In [None]:
analysis = """
analysis: All steps in the evaluation report were scored as Fully Met. There are some improvement suggestions mentioned, but none indicate clear underperformance or partial fulfillment of the task. Therefore, I will analyze the improvement suggestions to see if any step shows clear room for improvement that warrants responsibility attribution.

Step 1: URL identification was appropriate; suggestion is to justify URL choice more clearly. This is a Planner-related improvement.

Step 2: Content extraction was relevant; suggestion is to summarize content relevance explicitly. This relates to Executor's communication of results.

Step 3: Sufficiency assessment was accurate; suggestion is to state criteria explicitly. This is a Planner responsibility to define assessment criteria.

Step 4: Relevant links identified; suggestion to avoid non-functional links. This is an Executor detail in link selection.

Step 5: Redirecting search was efficient; suggestion to document rationale earlier. This relates to Replanner's decision-making transparency.

Step 6: Final extraction accurate; suggestion to include direct citation. This is Executor's presentation of results.

Additional replanning steps: Effective replanning; suggestion to document decision-making more explicitly. This is Replanner responsibility.

Summary of improvement suggestions:
- Planner: Justify URL choice, state sufficiency criteria explicitly
- Executor: Summarize content relevance, avoid non-functional links, include citations
- Replanner: Document replanning decisions more explicitly

None of these suggestions indicate failure or partial fulfillment, only room for clearer communication and documentation.

Hence, no step shows clear underperformance. The overall task outcome was successful with all steps fully met.

Final judgment:
- No agent caused underperformance.
- Minor improvements are distributed among Planner, Executor, and Replanner.
- Since the plan was solid and execution was correct, and replanning was effective, the overall responsibility is balanced.
- If forced to select the primary responsible agent for minor improvements, the Planner could be highlighted for improving clarity in plan justification and assessment criteria.

---

**Primary Responsible Agent**: Planner  
**Justification for Final Attribution**: The Planner could improve by explicitly justifying URL choices and clearly stating sufficiency criteria, which would enhance clarity and reduce ambiguity in the execution process. These foundational improvements would benefit the entire workflow.  
**Summary of Issues**: Minor suggestions for clearer documentation and communication in plan justification, content relevance assessment, and replanning rationale; no failures or partial completions."""
# response = prompt_optimizer.invoke({"messages": [("user", f"Analysis: \n{analysis}")]})

In [None]:
# print(response["messages"][-1].content)

In [None]:
# print(response)

## Define Graph State

In [None]:
from typing_extensions import TypedDict

class Evolution(TypedDict):
    input: str
    analysis: str
    result: str
    # updated_agent_system_prompt: str

## Define Agent Node

In [None]:
async def analyze_step(state: Evolution):
    response = await analyzer.ainvoke({"messages": [("user", state["input"])]})
    return {
        "analysis": response["messages"][-1].content # 儲存分析結果到state內
    }

async def prompt_optimize_step(state: Evolution):
    response = await prompt_optimizer.ainvoke({"messages": [("user", state["analysis"])]})
    
    return {
        "result": response["messages"][-1].content, # 儲存最終回覆到state內,
        # "updated_agent_system_prompt": response["structured_response"].updated_agent_system_prompt # 儲存更新過後的prompt到state內
    }

## Create Graph

In [None]:
from langgraph.graph import StateGraph, START, END
from IPython.display import Image, display

evolution_workflow = StateGraph(Evolution)

evolution_workflow.add_node("analyzer", analyze_step)
evolution_workflow.add_node("prompt_optimizer", prompt_optimize_step)

evolution_workflow.add_edge(START, "analyzer")
evolution_workflow.add_edge("analyzer", "prompt_optimizer")
evolution_workflow.add_edge("prompt_optimizer", END)

evolution_app = evolution_workflow.compile() # This compiles it into a LangChain Runnable, meaning you can use it as you would any other runnable

In [None]:
display(Image(evolution_app.get_graph(xray=True).draw_mermaid_png()))

## Run App

In [None]:
import time

with open("Outputs/evolution_chat_log.txt", "w") as f:
    f.write("")

def write_to_chat_log(content):
    with open("Outputs/evolution_chat_log.txt", "a") as f:
        f.write(content)

# Please analyze the evaluation result of the execution team.
config = {"recursion_limit": 50}
inputs = {
    "input": "Please analyze the evaluation result of the execution team.",
}
write_to_chat_log(f"Evolution Query:\n{inputs['input']}\n\n")

start_time = time.time()
async for event in evolution_app.astream(inputs, config=config):
    for agent, state in event.items():
        if agent != "__end__":
            write_to_chat_log(f"{agent}:\n")

            for key, value in state.items():
                if (key != "history"):
                    write_to_chat_log(f"{key}: {value}\n")
            
            write_to_chat_log("\n")
end_time = time.time()

evolution_time = end_time - start_time
print(f"Evolution time: {evolution_time:.2f} seconds")