# Game of 24: ToT + DFS

## Utils

### Imports

In [1]:
from langchain_openai import ChatOpenAI
from environments.game_of_24.common.evaluate_utils import GameOf24EvaluateOutputParser, game_of_24_evaluate_prompt
from environments.game_of_24.common.generate_utils import game_of_24_generate_openai_tools_prompt
from planning_library.custom_agent_executor.strategies import TreeOfThoughtsDFSStrategy
from planning_library.custom_agent_executor.strategies.tot_strategy.components import (
    ThoughtEvaluator,
    AgentThoughtGenerator,
    RunnableThoughtEvaluator,
    ThresholdThoughtEvaluatorContinueJudge,
)
from operator import itemgetter
from langchain_core.agents import AgentAction, AgentFinish, AgentStep
from langchain_core.runnables import RunnableLambda
from typing import List, Tuple, Union, Optional
from environments.game_of_24.common.environment import (
    GameOf24Environment,
    AddGameOf24Tool,
    SubtractGameOf24Tool,
    MultiplyGameOf24Tool,
    DivideGameOf24Tool,
    GetRemainingNumbersGameOf24Tool,
)
import os

%load_ext autoreload
%autoreload 2

### Enabling W&B 

In [2]:
# os.environ["LANGCHAIN_WANDB_TRACING"] = "true"
# os.environ["WANDB_PROJECT"] = "aeliseeva-tot-dfs-game24-test"

### Enabling LangSmith

In [3]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "ToT + Game of 24 test"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

### Defining components

#### Hyperparameters

In [4]:
# ToT hyperparameters
max_num_thoughts = 3  # number of thoughts to generate at each iteration
max_num_steps = 20  # total maximum number of iterations
value_threshold = 0.49  # threshold for evaluation; only thoughts with value > value_threshold will be explored

# other hyperparameters
model_name = "gpt-3.5-turbo"
temperature = 0.8

#### Thought Evaluator

Evaluation prompt is adapted from the [original implementation](https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py).

In [5]:
game_of_24_evaluate_prompt.input_variables

['inputs', 'next_thought', 'observation', 'trajectory']

In [6]:
print(game_of_24_evaluate_prompt.format(inputs="2 3 6 4", trajectory="none", next_thought="2 + 3", observation="5"))

System: You are a helpful assistant that judges whether given numbers can reach 24.
Human: Given inputs and intermediate steps for Game of 24, evaluate if a new suggestion is correct and allows to reach 24. You are allowed to comment your decision, but make sure to always output one of the following words in the end: 'sure', 'likely', 'impossible'. Here are some examples:

Human: Input: 1 1 8 14
Steps taken: 1 + 1 = 2
Suggestion: 2 + 8 = 10
Judge:
AI: Available numbers after previous steps were: 2 8 14, after last suggestion: 10 14
10 + 14 = 24
sure
Human: Input: 2 12 8 14
Steps taken: 12 / 2 = 6
Suggestion: 2 + 8 = 10
Judge:
AI: Available numbers after previous steps were: 6 8 14, but last suggestion uses 2, which is no longer in the input
impossible
Human: Input: 24 6 12 11
Steps taken: 24 / 12 = 2
Suggestion: 2 * 6 = 12 = 12
Judge:
AI: Available numbers after previous steps were: 2 6 11, after last suggestion: 12 11
11 + 12 = 23, 12 - 11 = 1, 11 * 12 = 132, 11 / 12 = 0.91
impossible

In [7]:
def format_observation(observation: Optional[Union[List[AgentStep], AgentStep]]) -> Optional[str]:
    if observation is None:
        return None

    if isinstance(observation, list):
        return ";".join(format_observation(obs) for obs in observation)

    return observation.observation

In [8]:
from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages

op_map = {"add": "+", "subtract": "-", "multiply": "*", "divide": "/"}


def format_next_thought(next_thought: Union[List[AgentAction], AgentAction, AgentFinish]) -> str:
    if isinstance(next_thought, AgentAction):
        if next_thought.tool == "get_remaining_numbers":
            return "get_remaining_numbers"
        number1 = next_thought.tool_input["number1"]
        number2 = next_thought.tool_input["number2"]
        return f"{number1} {op_map[next_thought.tool]} {number2}"
    elif isinstance(next_thought, AgentFinish):
        return next_thought.log
    else:
        # ugly hack, because it ignores an instruction to generate one action per step ><
        return format_next_thought(next_thought[0])


evaluator_chain = (
    {
        "inputs": RunnableLambda(lambda x: x["inputs"]["inputs"]),
        "trajectory": itemgetter("trajectory") | RunnableLambda(format_to_openai_tool_messages),
        "observation": itemgetter("observation") | RunnableLambda(format_observation),
        "next_thought": itemgetter("next_thought") | RunnableLambda(format_next_thought),
    }
    | game_of_24_evaluate_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | GameOf24EvaluateOutputParser()
)

judge = ThresholdThoughtEvaluatorContinueJudge(value_threshold)
backbone = RunnableThoughtEvaluator(evaluator_chain)
thought_evaluator = ThoughtEvaluator(judge=judge, backbone=backbone)

#### Thought Generator

Generation prompt is adapted from the [original implementation](https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py).

In [9]:
game_of_24_generate_openai_tools_prompt.input_variables

['agent_scratchpad', 'max_num_thoughts']

In [10]:
print(game_of_24_generate_openai_tools_prompt.format(inputs="2 2 2 8", agent_scratchpad=[], max_num_thoughts=5))

System: You are a helpful assistant that plays Game of 24.
Human: Your end goal is to obtain 24 from given numbers via basic arithmetic operations with given numbers. Use 5 of available tool(s) to suggest possible next step(s) as the next step from current state. Make sure to suggest exactly 5 tool call(s), no more and no less.


In [11]:
env = GameOf24Environment()
add_tool = AddGameOf24Tool(env=env)
subtract_tool = SubtractGameOf24Tool(env=env)
multiply_tool = MultiplyGameOf24Tool(env=env)
divide_tool = DivideGameOf24Tool(env=env)
get_remaining_numbers_tools = GetRemainingNumbersGameOf24Tool(env=env)

In [12]:
from langchain.agents import create_openai_tools_agent


agent = create_openai_tools_agent(
    llm=ChatOpenAI(model=model_name, temperature=temperature),
    tools=[add_tool, subtract_tool, multiply_tool, divide_tool, get_remaining_numbers_tools],
    prompt=game_of_24_generate_openai_tools_prompt,
)

thought_generator = AgentThoughtGenerator()

### Defining strategy

In [13]:
strategy_executor = TreeOfThoughtsDFSStrategy(
    agent=agent,
    tools=[add_tool, subtract_tool, multiply_tool, divide_tool, get_remaining_numbers_tools],
    thought_generator=thought_generator,
    thought_evaluator=thought_evaluator,
    max_num_thoughts=max_num_thoughts,
    max_iterations=max_num_steps,
    verbose=True,
    return_intermediate_steps=False,
)

# Running strategy

In [14]:
env.update(6, 6, 1, 4)

In [15]:
strategy_executor.invoke({"inputs": "6 6 1 4"})



[1m> Entering new TreeOfThoughtsDFSStrategy chain...[0m
[32;1m[1;3m
Invoking: `get_remaining_numbers` with `{}`


[0m[33;1m[1;3m6 6 1 4[0m[32;1m[1;3m
Invoking: `divide` with `{'number1': 6, 'number2': 6}`


[0m[36;1m[1;3m1 6 4[0m[32;1m[1;3m
Invoking: `multiply` with `{'number1': 1, 'number2': 4}`


[0m[38;5;200m[1;3m6 4[0m

OutputParserException: GameOf24EvaluateOutputParser expected output value to contain one of ['sure', 'likely', 'impossible'] (case-insensitive). Received i'm unable to process the given input as it seems to be in a format that i can't understand. please provide the input and the suggestion in a clear and concise manner for me to evaluate..