# Game of 24: ToT + DFS

## Utils

### Imports

In [17]:
from langchain_openai import ChatOpenAI
from environments.game_of_24.common.evaluate_parser import GameOf24EvaluateOutputParser
from environments.game_of_24.tot_dfs.evaluate_prompts import evaluate_prompt
from environments.game_of_24.tot_dfs.generate_prompts import generate_sample_openai_tools_prompt
from planning_library.strategies import TreeOfThoughtsDFSStrategy
from planning_library.strategies.tot_dfs.components import (
    ThoughtEvaluator,
    AgentThoughtGenerator,
    RunnableThoughtEvaluator,
    ThresholdThoughtEvaluatorContinueJudge,
)
from operator import itemgetter
from langchain_core.agents import AgentAction, AgentFinish, AgentStep
from langchain_core.runnables import RunnableLambda
from typing import List, Union, Optional
from environments.game_of_24.common.simple_tools import add, subtract, multiply, divide
import os

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Enabling logging

In [18]:
# os.environ["LANGCHAIN_WANDB_TRACING"] = "true"
# os.environ["WANDB_PROJECT"] = "aeliseeva-tot-dfs-game24-test"

In [19]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "ToT + Game of 24 test"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

## Defining components

### Hyperparameters

In [20]:
# ToT hyperparameters
max_num_thoughts = 3  # number of thoughts to generate at each iteration
max_num_steps = 20  # total maximum number of iterations
value_threshold = 0.49  # threshold for evaluation; only thoughts with value > value_threshold will be explored

# other hyperparameters
model_name = "gpt-3.5-turbo"
temperature = 0.8

## Thought Evaluator

**Thought Evaluator** is responsible for evaluating the plausibility of individual thoughts.

Default **Thought Evaluator** is powered by a [`Runnable`](https://python.langchain.com/docs/expression_language/interface) that takes in:

* `inputs`: original inputs
* `trajectory`: a sequence of agent actions up to current node
* `next_thought`: a current suggestion for the next step (either finish or call tool(s))
* `observation`: `None` if `next_thought` is `AgentFinish`, otherwise, the result of tool call(s)

The runnable is allowed to return anything. In this case, it returns a float ranging from 0 to 1.



### Prompt

Here is a simple prompt for Thought Evaluator in Game of 24.

In [21]:
evaluate_prompt.input_variables

['inputs', 'next_thought', 'observation', 'trajectory']

In [22]:
print(evaluate_prompt.format(inputs="2 3 6 4", trajectory=[], next_thought="2 + 3", observation="5"))

System: You are a helpful assistant that judges whether answers to Game of 24 are correct or individual steps to reach 24 from given numbers seem plausible.
Human: You are given inputs and intermediate steps for Game of 24: the goal is to reach 24 via arithmetical operations on given numbers. In your case, Game of 24 is played in step-by-step fashion: each suggestion is an arithmetical operation between two numbers. You might be given either a suggestion (a single arithmetical operation on two numbers) or a final answer (mathematical expression that should be equal to 24). In the former case, evaluate if a new suggestion is correct and how likely it is to help in reaching 24. In the latter case, evaluate if the final answer is correct, i.e., uses each input number exactly once. You are allowed to comment your decision, but make sure to always output one of the following words in the end: 'sure', 'likely', 'impossible'.
Human: Input: 2 3 6 4
AI: Suggestion: 2 + 3 = 5
Human: Judge:


### Formatting Utils

In [23]:
from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages


op_map = {"add": "+", "subtract": "-", "multiply": "*", "divide": "/"}


def format_observation(observation: Optional[Union[List[AgentStep], AgentStep]]) -> Optional[str]:
    if observation is None:
        return None

    if isinstance(observation, list):
        return ";".join(format_observation(obs) for obs in observation)

    return str(observation.observation)


def format_next_thought(next_thought: Union[List[AgentAction], AgentAction, AgentFinish]) -> str:
    if isinstance(next_thought, AgentAction):
        number1 = next_thought.tool_input["number1"]
        number2 = next_thought.tool_input["number2"]
        return f"{number1} {op_map[next_thought.tool]} {number2}"
    elif isinstance(next_thought, AgentFinish):
        return next_thought.log
    else:
        # ugly hack, because it ignores an instruction to generate one action per step ><
        return format_next_thought(next_thought[0])

### Putting It All Together

In [24]:
evaluator_chain = (
    {
        "inputs": RunnableLambda(lambda x: x["inputs"]["inputs"]),
        "trajectory": itemgetter("trajectory") | RunnableLambda(format_to_openai_tool_messages),
        "observation": itemgetter("observation") | RunnableLambda(format_observation),
        "next_thought": itemgetter("next_thought") | RunnableLambda(format_next_thought),
    }
    | evaluate_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | GameOf24EvaluateOutputParser()
)

judge = ThresholdThoughtEvaluatorContinueJudge(value_threshold)
backbone = RunnableThoughtEvaluator(evaluator_chain)
thought_evaluator = ThoughtEvaluator(judge=judge, backbone=backbone)

## Thought Generator

**Thought Generator** is responsible for suggesting possible actions on each new step.

Default **Thought Generator** is powered by [any agent available in LangChain](https://python.langchain.com/docs/modules/agents/agent_types/). Specifically, it expects either [`BaseSingleActionAgent`](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.BaseSingleActionAgent.html#langchain.agents.agent.BaseSingleActionAgent) or [`BaseMultiAgentAgent`](https://api.python.langchain.com/en/latest/agents/langchain.agents.agent.BaseMultiActionAgent.html#langchain.agents.agent.BaseMultiActionAgent).

In [Tree of Thoughts paper](https://arxiv.org/abs/2305.10601), there are two possible workflows for Tree of Thoughts:

* *Sample*: $k$ i.i.d. calls to a Thought Generator to get $k$ suggestions
* *Propose*: a single call to a Thought Generator to get $k$ suggestions

Let's try *Sample* one.

### Prompt

Generation prompt is adapted from the [original implementation](https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py).

In [25]:
generate_sample_openai_tools_prompt.input_variables

['agent_scratchpad', 'inputs', 'previous_thoughts']

In [28]:
print(
    generate_sample_openai_tools_prompt.format(
        inputs="2 2 2 8", agent_scratchpad=[], previous_thoughts=[], max_num_thoughts=5
    )
)

System: You are a helpful assistant that plays Game of 24.
Human: Your end goal is to obtain 24 from given numbers via basic arithmetic operations. Let's play Game of 24 in a step-by-step fashion: use only one of available tools to suggest a possible next step from the current state. Please, make sure to suggest exactly ONE (1) tool call, no more and no less. Refrain from calling tools only when you're ready to give a final answer. In this case, make sure to include a mathematical expression showing how to obtain 24 from given numbers, for instance: '(2 + 2) * (12 / 2) = 24'
Inputs: 2 2 2 8
Human: You might have already made some suggestions for the current state - if you did, you will find them below. Don't repeat yourself.


### Tools

For Game of 24, we provide the agent with simple tools performing basic arithmetical operations.

In [29]:
tools = [add, subtract, multiply, divide]

### Putting It All Together

Let's use [OpenAI Tools](https://python.langchain.com/docs/modules/agents/agent_types/openai_tools) agent.

In [38]:
from langchain_core.utils.function_calling import convert_to_openai_tool
from langchain_core.messages import BaseMessage, AIMessage

from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser, OpenAIToolAgentAction


def format_previous_thoughts(
    previous_thoughts: List[Union[List[AgentAction], AgentFinish, AgentAction]]
) -> List[BaseMessage]:
    messages = []
    for agent_action in previous_thoughts:
        if isinstance(agent_action, OpenAIToolAgentAction):
            new_messages = list(agent_action.message_log)
            messages.extend([new for new in new_messages if new not in messages])
        elif isinstance(agent_action, list):
            messages.extend(format_previous_thoughts(agent_action))
        else:
            messages.append(AIMessage(content=agent_action.log))
    return messages

In [39]:
# copied from langchain.agents.create_openai_tools_agent to allow for custom formatting of `previous_thoughts in inputs


llm = ChatOpenAI(model=model_name, temperature=temperature).bind(tools=[convert_to_openai_tool(tool) for tool in tools])

agent = (
    {
        "inputs": itemgetter("inputs"),
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(x["intermediate_steps"]),
        "previous_thoughts": lambda x: format_previous_thoughts(x["previous_thoughts"]),
    }
    | generate_sample_openai_tools_prompt
    | llm
    | OpenAIToolsAgentOutputParser()
)

# Defining strategy

In [40]:
strategy_executor = TreeOfThoughtsDFSStrategy(
    agent=agent,
    tools=tools,
    thought_generator=AgentThoughtGenerator(),
    thought_evaluator=thought_evaluator,
    max_num_thoughts=max_num_thoughts,
    max_iterations=max_num_steps,
    verbose=True,
    return_intermediate_steps=False,
)

## Running strategy

In [41]:
strategy_executor.invoke({"inputs": "1 1 4 6"})



[1m> Entering new TreeOfThoughtsDFSStrategy chain...[0m


BadRequestError: Error code: 400 - {'error': {'message': "An assistant message with 'tool_calls' must be followed by tool messages responding to each 'tool_call_id'. The following tool_call_ids did not have response messages: call_lGEOyFMIwkpHf63DHL4OIQ35", 'type': 'invalid_request_error', 'param': 'messages', 'code': None}}