In [5]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_ollama import ChatOllama
from langchain_huggingface import HuggingFaceEmbeddings  # HuggingFace嵌入

load_dotenv()

model = ChatOpenAI(
                    model='deepseek-chat',
                    openai_api_key=os.getenv('DEEPSEEK_API_KEY'),
                    openai_api_base="https://api.deepseek.com/v1",
                    # model='gpt-4o-mini',
                    # openai_api_key=os.getenv('CLOSEAI_API_KEY'),
                    # openai_api_base="https://api.openai-proxy.org/v1",
                )

# llm = ChatOllama(
#                     model="llama3.2:3b"
#                 )

embedding = HuggingFaceEmbeddings(
            model_name=r"E:/Workplace/GitHub/Embedding/maidalun/bce-embedding-base_v1",
            model_kwargs={"device": "cuda",
                          "trust_remote_code": True},
            encode_kwargs={"normalize_embeddings": True},
        )

# Define graph state

In [2]:
from typing import List
from typing_extensions import TypedDict


class ReWOO(TypedDict):
    task: str
    plan_string: str
    steps: List
    results: dict
    result: str

# Planner

In [3]:
prompt = """For the following task, make plans that can solve the problem step by step. For each plan, indicate \
which external tool together with tool input to retrieve evidence. You can store the evidence into a \
variable #E that can be called by later tools. (Plan, #E1, Plan, #E2, Plan, ...)

Tools can be one of the following:
(1) Google[input]: Worker that searches results from Google. Useful when you need to find short
and succinct answers about a specific topic. The input should be a search query.
(2) LLM[input]: A pretrained LLM like yourself. Useful when you need to act with general
world knowledge and common sense. Prioritize it when you are confident in solving the problem
yourself. Input can be any instruction.

For example,
Task: Thomas, Toby, and Rebecca worked a total of 157 hours in one week. Thomas worked x
hours. Toby worked 10 hours less than twice what Thomas worked, and Rebecca worked 8 hours
less than Toby. How many hours did Rebecca work?
Plan: Given Thomas worked x hours, translate the problem into algebraic expressions and solve
with Wolfram Alpha. #E1 = WolframAlpha[Solve x + (2x − 10) + ((2x − 10) − 8) = 157]
Plan: Find out the number of hours Thomas worked. #E2 = LLM[What is x, given #E1]
Plan: Calculate the number of hours Rebecca worked. #E3 = Calculator[(2 ∗ #E2 − 10) − 8]

Begin! 
Describe your plans with rich details. Each Plan should be followed by only one #E.

Task: {task}"""

In [4]:
task = "what is the exact hometown of the 2024 mens australian open winner"

In [6]:
result = model.invoke(prompt.format(task=task))

In [7]:
result

AIMessage(content="Plan: Identify the winner of the 2024 Men's Australian Open. Since the task is about a future event (as of October 2023), the winner is not yet known. However, we can assume the task is hypothetical or based on a prediction. Use Google to search for the most likely winner of the 2024 Men's Australian Open based on current rankings or predictions.  \n#E1 = Google[2024 Men's Australian Open winner prediction or likely winner]\n\nPlan: Retrieve the exact hometown of the winner identified in #E1. Use Google to search for the hometown of the player mentioned in #E1.  \n#E2 = Google[Exact hometown of [winner from #E1]]\n\nPlan: Verify the hometown information using an LLM to ensure accuracy and consistency. Provide the player's name and the hometown retrieved from #E2 to the LLM for confirmation.  \n#E3 = LLM[Confirm if [winner from #E1]'s exact hometown is [hometown from #E2]]\n\nPlan: Present the final answer based on the verified information from #E3.  \n#E4 = LLM[Provi

In [8]:
print(result.content)

Plan: Identify the winner of the 2024 Men's Australian Open. Since the task is about a future event (as of October 2023), the winner is not yet known. However, we can assume the task is hypothetical or based on a prediction. Use Google to search for the most likely winner of the 2024 Men's Australian Open based on current rankings or predictions.  
#E1 = Google[2024 Men's Australian Open winner prediction or likely winner]

Plan: Retrieve the exact hometown of the winner identified in #E1. Use Google to search for the hometown of the player mentioned in #E1.  
#E2 = Google[Exact hometown of [winner from #E1]]

Plan: Verify the hometown information using an LLM to ensure accuracy and consistency. Provide the player's name and the hometown retrieved from #E2 to the LLM for confirmation.  
#E3 = LLM[Confirm if [winner from #E1]'s exact hometown is [hometown from #E2]]

Plan: Present the final answer based on the verified information from #E3.  
#E4 = LLM[Provide the exact hometown of the 

## Planner Node

In [9]:
import re

from langchain_core.prompts import ChatPromptTemplate

# Regex to match expressions of the form E#... = ...[...]
regex_pattern = r"Plan:\s*(.+)\s*(#E\d+)\s*=\s*(\w+)\s*\[([^\]]+)\]"
prompt_template = ChatPromptTemplate.from_messages([("user", prompt)])
planner = prompt_template | model


def get_plan(state: ReWOO):
    task = state["task"]
    result = planner.invoke({"task": task})
    # Find all matches in the sample text
    matches = re.findall(regex_pattern, result.content)
    return {"steps": matches, "plan_string": result.content}

# Executor

In [10]:
from langchain_community.tools.tavily_search import TavilySearchResults

search = TavilySearchResults()

In [11]:
search.invoke("中国有几个古代王朝？")

[{'url': 'https://www.zhihu.com/tardis/bd/art/409390451',
  'content': '中国朝代顺序完整表（建议收藏） 中国朝代顺序完整表（建议收藏） 2148 赞同 200 评论 8217 收藏 中国上下5000千年历史朝代顺序为：夏朝、商朝、西周、东周、秦朝、西楚、西汉、新朝、玄汉、东汉、三国、曹魏、蜀汉、孙吴、西晋、东晋、十六国、南朝、刘宋、南齐、南梁、南陈、北朝、北魏、东魏、北齐、西魏、北周、隋朝、唐朝、武周、唐朝中后期、五代、后梁、后唐、后晋、后汉、后周、十国、南吴、南唐、前属、后属、南汉、南楚、吴越、闽国、荆国、北汉、北宋、南宋、辽朝、西夏、金朝、元朝、明朝、清朝。 可以用口诀来进行辅助记忆，如： 三皇五帝始，尧舜禹相传； 夏商与西周，东周分两段； 春秋和战国，一统秦两汉； 三分魏蜀吴，二晋前后沿； 南北朝并立，隋唐五代传； 宋元明清后，皇朝至此完。 中国历史朝代主要有夏、商、周、秦、汉、三国、晋、南北朝、隋、唐、五代、十国、宋、辽、西夏、金、元、明、清、中华民国、中华人民共和国。 具体朝代年份如下： 夏朝：约公元前2029年-约公元前1559年，共计：471年 商朝：约公元前1559年-约公元前1046年，共计：438年 周朝：约公元前1046年-公元前256年，分为西周、东周，东周又分为春秋、战国，共计：867年 秦朝：公元前221-公元前206年，前221年秦王嬴政统一六国，首称皇帝，共计：16年 西楚：公元前206年-公元前202年，西楚霸王项羽，共计：5年 西汉：公元前202年-公元8年，汉高祖刘邦，共计：210年 新朝：公元8年腊月-公元23年10月6日，新太祖建兴帝王莽，共计：16年 玄汉：公元23-25年，汉更始帝刘玄，共计：3年 东汉：公元25-220年，汉光武帝刘秀，共计：196年 三国：公元220-280年，魏、蜀、吴三足鼎立，共计：61年 晋朝：公元265-420年，分为西晋(265-316年)、东晋(317-420年) ，共计：156年 南北朝：公元420-589年，共计：170年 隋朝：公元581-公元618年，隋文帝杨坚，共计：38年 唐朝：公元618-907年，唐高祖李渊，共计：290年 五代：公元907-960年，后梁、后唐、后晋、后汉、后周，共计：54年 十国：公

In [12]:
def _get_current_task(state: ReWOO):
    if "results" not in state or state["results"] is None:
        return 1
    if len(state["results"]) == len(state["steps"]):
        return None
    else:
        return len(state["results"]) + 1


def tool_execution(state: ReWOO):
    """Worker node that executes the tools of a given plan."""
    _step = _get_current_task(state)
    _, step_name, tool, tool_input = state["steps"][_step - 1]
    _results = (state["results"] or {}) if "results" in state else {}
    for k, v in _results.items():
        tool_input = tool_input.replace(k, v)
    if tool == "Google":
        result = search.invoke(tool_input)
    elif tool == "LLM":
        result = model.invoke(tool_input)
    else:
        raise ValueError
    _results[step_name] = str(result)
    return {"results": _results}

# Solver

In [13]:
solve_prompt = """Solve the following task or problem. To solve the problem, we have made step-by-step Plan and \
retrieved corresponding Evidence to each Plan. Use them with caution since long evidence might \
contain irrelevant information.

{plan}

Now solve the question or task according to provided Evidence above. Respond with the answer
directly with no extra words.

Task: {task}
Response:"""


def solve(state: ReWOO):
    plan = ""
    for _plan, step_name, tool, tool_input in state["steps"]:
        _results = (state["results"] or {}) if "results" in state else {}
        for k, v in _results.items():
            tool_input = tool_input.replace(k, v)
            step_name = step_name.replace(k, v)
        plan += f"Plan: {_plan}\n{step_name} = {tool}[{tool_input}]"
    prompt = solve_prompt.format(plan=plan, task=state["task"])
    result = model.invoke(prompt)
    return {"result": result.content}

# Define Graph

In [14]:
def _route(state):
    _step = _get_current_task(state)
    if _step is None:
        # We have executed all tasks
        return "solve"
    else:
        # We are still executing tasks, loop back to the "tool" node
        return "tool"

In [15]:
from langgraph.graph import END, StateGraph, START

graph = StateGraph(ReWOO)
graph.add_node("plan", get_plan)
graph.add_node("tool", tool_execution)
graph.add_node("solve", solve)
graph.add_edge("plan", "tool")
graph.add_edge("solve", END)
graph.add_conditional_edges("tool", _route)
graph.add_edge(START, "plan")

app = graph.compile()

In [16]:
for s in app.stream({"task": task}):
    print(s)
    print("---")

{'plan': {'steps': [("Identify the winner of the 2024 Men's Australian Open. Since this is a recent event, use Google to search for the winner's name.  ", '#E1', 'Google', "Who won the 2024 Men's Australian Open?"), ("Once the winner's name is known, use Google again to find the exact hometown of the winner.  ", '#E2', 'Google', "What is the exact hometown of {winner's name from #E1}?"), ("Verify the accuracy of the hometown information by cross-referencing with a reliable source or the winner's official profile.  ", '#E3', 'Google', "Official profile or reliable source for {winner's name from #E1} hometown confirmation"), ('If necessary, use an LLM to summarize or clarify the information retrieved from the previous steps.  ', '#E4', 'LLM', "Summarize the hometown information for {winner's name from #E1} based on #E2 and #E3.")], 'plan_string': "Plan: Identify the winner of the 2024 Men's Australian Open. Since this is a recent event, use Google to search for the winner's name.  \n#E1 

In [17]:
# Print out the final result
print(s["solve"]["result"])

San Candido (Innichen), South Tyrol, Italy
