# Game of 24: ToT + DFS

## Utils

### Imports

In [28]:
from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
from langchain_openai import ChatOpenAI
from environments.game_of_24.evaluate_utils import GameOf24EvaluateOutputParser, game_of_24_evaluate_prompt
from environments.game_of_24.game_24_utils import game_of_24_prompt
from environments.game_of_24.generate_utils import GameOf24GenerateOutputParser, game_of_24_generate_prompt
from planning_library.custom_agent_executor.strategies import TreeOfThoughtsDFSStrategy
from planning_library.custom_agent_executor.strategies.toolllm.thought_generators import LLMThoughtGenerator
from planning_library.custom_agent_executor.strategies.toolllm.thought_evaluators import LLMThoughtEvaluator
import os
import re

### Enabling W&B 

In [29]:
os.environ["LANGCHAIN_WANDB_TRACING"] = "true"

### Defining components

#### Hyperparameters

In [30]:
# ToT hyperparameters
max_num_thoughts = 3  # number of thoughts to generate at each iteration
max_num_steps = 20  # total maximum number of iterations
value_threshold = 0.49  # threshold for evaluation; only thoughts with value > value_threshold will be explored

# other hyperparameters
model_name = "gpt-3.5-turbo"
temperature = 0.8

#### Thought Evaluator

Evaluation prompt is adapted from the [original implementation](https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py).

In [24]:
game_of_24_evaluate_prompt.input_variables

['thought']

In [27]:
print(game_of_24_evaluate_prompt.format(thought="2 + 3 = 5 (left: 6 5 4)"))

System: You are a helpful assistant that judges whether given numbers can reach 24.
Human: Evaluate if given numbers can reach 24 (sure/likely/impossible)
Human: Input: 2 + 8 = 10 (left: 10 14)
Judge:
AI: 10 + 14 = 24
sure
Human: Input: 2 * 6 = 12 (left: 11 12)
Judge:
AI: 11 + 12 = 23
12 - 11 = 1
11 * 12 = 132
11 / 12 = 0.91
impossible
Human: Input: 2 + 2 = 4 (left: 4 4 10)
Judge:
AI: 4 + 4 + 10 = 8 + 10 = 18
4 * 10 - 4 = 40 - 4 = 36
(10 - 4) * 4 = 6 * 4 = 24
sure
Human: Input: 10 + 1 = 11 (left: 4 9 11)
Judge:
AI: 9 + 11 + 4 = 20 + 4 = 24
sure
Human: Input: 2 * 4 = 8 (left: 5 7 8)
Judge:
AI: 5 + 7 + 8 = 12 + 8 = 20
(8 - 5) * 7 = 3 * 7 = 21
I cannot obtain 24 now, but numbers are within a reasonable range
likely
Human: Input: 2 * 3 = 6 (left: 5 6 6)
Judge:
AI: 5 + 6 + 6 = 17
(6 - 5) * 6 = 1 * 6 = 6
I cannot obtain 24 now, but numbers are within a reasonable range
likely
Human: Input: 2 * 5 = 10 (left: 10 10 11)
Judge:
AI: 10 + 10 + 11 = 31
(11 - 10) * 10 = 10
10 10 10 are all too big
i

In [5]:
evaluator_chain = (
    {
        "inputs": lambda x: x["inputs"] if not x["current_state"] else x["current_state"][-1],
        "thought": lambda x: x["thought"],
    }
    | game_of_24_evaluate_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | GameOf24EvaluateOutputParser()
)
thought_evaluator = LLMThoughtEvaluator(llm_chain=evaluator_chain)

#### Thought Generator

Generation prompt is adapted from the [original implementation](https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py).

In [19]:
game_of_24_generate_prompt.input_variables

['inputs', 'max_num_thoughts']

In [23]:
print(game_of_24_generate_prompt.format(inputs="2 2 3 4", max_num_thoughts="3"))

System: You are a helpful assistant that proposes next steps in Game of 24.
Human: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. Each step, you are only allowed to choose two of the remaining numbers to obtain a new number.
Human: Input: 2 8 8 14
8 variants for a possible next step:
AI: 2 + 8 = 10 (left: 8 10 14)
8 / 2 = 4 (left: 4 8 14)
14 + 2 = 16 (left: 8 8 16)
2 * 8 = 16 (left: 8 14 16)
8 - 2 = 6 (left: 6 8 14)
14 - 8 = 6 (left: 2 6 8)
14 /  2 = 7 (left: 7 8 8)
14 - 2 = 12 (left: 8 8 12)
Human: Input: 2 2 3 4
3 variants for a possible next step:


In [7]:
generator_chain = (
    {
        "inputs": lambda x: x["inputs"]
        if not x["current_state"]
        else re.search(r"^.*?\(left: (.*?)\)$", x["current_state"][-1]).groups()[0],
        "max_num_thoughts": lambda x: x["max_num_thoughts"],
    }
    | game_of_24_generate_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | GameOf24GenerateOutputParser(k=max_num_thoughts)
)
thought_generator = LLMThoughtGenerator(llm_chain=generator_chain, thought_generation_mode="propose")

#### Agent

Agent prompt is adapted from the [original implementation](https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py).

In [18]:
game_of_24_prompt.input_variables

['inputs']

In [15]:
print(game_of_24_prompt.format(inputs="2 2 3 4"))

System: You are a helpful assistant that plays Game of 24.
Human: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. Each step, you are only allowed to choose two of the remaining numbers to obtain a new number.
Human: Input: 4 4 6 8
AI: 4 + 8 = 12 (left: 4 6 12)
6 - 4 = 2 (left: 2 12)
2 * 12 = 24 (left: 24)
Answer: (6 - 4) * (4 + 8) = 24
Human: Input: 2 9 10 12
AI: 12 * 2 = 24 (left: 9 10 24)
10 - 9 = 1 (left: 1 24)
24 * 1 = 24 (left: 24)
Answer: (12 * 2) * (10 - 9) = 24
Human: Input: 4 9 10 13
AI: 13 - 10 = 3 (left: 3 4 9)
9 - 3 = 6 (left: 4 6)
4 * 6 = 24 (left: 24)
Answer: 4 * (9 - (13 - 10)) = 24
Human: Input: 1 4 8 8
AI: 8 / 4 = 2 (left: 1 2 8)
1 + 2 = 3 (left: 3 8)
3 * 8 = 24 (left: 24)
Answer: (1 + 8 / 4) * 8 = 24
Human: Input: 5 5 5 9
AI: 5 + 5 = 10 (left: 5 9 10)
10 + 5 = 15 (left: 9 15)
15 + 9 = 24 (left: 24)
Answer: ((5 + 5) + 5) + 9 = 24
Human: Input: 2 2 3 4


In [9]:
agent = (
    {
        "inputs": lambda x: x["inputs"],
        "agent_scratchpad": lambda x: format_to_openai_tool_messages(x["intermediate_steps"]),
    }
    | game_of_24_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | OpenAIToolsAgentOutputParser()
)

### Defining strategy

In [10]:
strategy_executor = TreeOfThoughtsDFSStrategy(
    thought_generator=thought_generator,
    thought_evaluator=thought_evaluator,
    agent=agent,
    max_num_thoughts=max_num_thoughts,
    max_num_steps=max_num_steps,
    value_threshold=value_threshold,
    tools=[],
    verbose=True,
)

# Running strategy

In [None]:
strategy_executor.invoke({"inputs": "5 3 4 5"})