# Game of 24: ToT + DFS

## Utils

### Imports

In [3]:
from langchain_openai import ChatOpenAI
from environments.game_of_24.evaluate_utils import GameOf24EvaluateOutputParser, game_of_24_evaluate_prompt
from environments.game_of_24.generate_utils import GameOf24GenerateOutputParser, game_of_24_generate_prompt
from planning_library.custom_agent_executor.strategies import TreeOfThoughtsDFSStrategy
from planning_library.custom_agent_executor.strategies.toolllm.thought_generators import AgentThoughtGenerator
from planning_library.custom_agent_executor.strategies.toolllm.thought_evaluators import LLMThoughtEvaluator
from langchain_core.agents import AgentAction
from typing import Dict, Any
import os
import re

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Enabling W&B 

In [4]:
os.environ["LANGCHAIN_WANDB_TRACING"] = "true"

### Defining components

#### Hyperparameters

In [5]:
# ToT hyperparameters
max_num_thoughts = 3  # number of thoughts to generate at each iteration
max_num_steps = 20  # total maximum number of iterations
value_threshold = 0.49  # threshold for evaluation; only thoughts with value > value_threshold will be explored
num_retries_evaluator = 5  # how many times to query for evaluation

# other hyperparameters
model_name = "gpt-3.5-turbo"
temperature = 0.8

#### Thought Evaluator

Evaluation prompt is adapted from the [original implementation](https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py).

In [6]:
game_of_24_evaluate_prompt.input_variables

['inputs', 'thought']

In [7]:
print(game_of_24_evaluate_prompt.format(inputs="2 3 6 4", thought="2 + 3 = 5 (left: 6 5 4)"))

System: You are a helpful assistant that judges whether given numbers can reach 24.
Human: Evaluate if given numbers can reach 24 (sure/likely/impossible)
Human: Input: 2 8 14
Thought: 2 + 8 = 10 (left: 10 14)
Judge:
AI: 10 + 14 = 24
sure
Human: Input: 6 8 14
Thought: 2 + 8 = 10 (left: 10 14)
Judge:
AI: 10 + 14 = 24
but there was no 2 in input
impossible
Human: Input: 2 6 11
Thought: 2 * 6 = 12 (left: 11 12)
Judge:
AI: 11 + 12 = 23
12 - 11 = 1
11 * 12 = 132
11 / 12 = 0.91
impossible
Human: Input: 2 2 4 10
Thought: 2 + 2 = 4 (left: 4 4 10)
Judge:
AI: 4 + 4 + 10 = 8 + 10 = 18
4 * 10 - 4 = 40 - 4 = 36
(10 - 4) * 4 = 6 * 4 = 24
sure
Human: Input: 4 1 9 10
Thought: 10 + 1 = 11 (left: 4 9 11)
Judge:
AI: 9 + 11 + 4 = 20 + 4 = 24
sure
Human: Input: 5 2 7 4
Thought: 2 * 4 = 8 (left: 5 7 8)
Judge:
AI: 5 + 7 + 8 = 12 + 8 = 20
(8 - 5) * 7 = 3 * 7 = 21
I cannot obtain 24 now, but numbers are within a reasonable range
likely
Human: Input: 5 6 2 3
Thought: 2 * 3 = 6 (left: 5 6 6)
Judge:
AI: 5 + 6 + 6

In [8]:
def process_inputs_evaluator(x: Dict[str, Any]) -> str:
    if not x["current_state"]:
        return x["inputs"]

    next_numbers = re.search("^.*?\(left: (.*?)\)$", x["current_state"][-1][1])

    if next_numbers:
        return next_numbers.groups()[0]

    return x["current_state"][-1][1]


evaluator_chain = (
    {
        "inputs": lambda x: process_inputs_evaluator(x),
        "thought": lambda x: x["thought"].tool_input
        if isinstance(x["thought"], AgentAction)
        else x["thought"].return_values["output"],
    }
    | game_of_24_evaluate_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | GameOf24EvaluateOutputParser()
)
thought_evaluator = LLMThoughtEvaluator(llm_chain=evaluator_chain)

#### Thought Generator

Generation prompt is adapted from the [original implementation](https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py).

In [9]:
game_of_24_generate_prompt.input_variables

['inputs', 'max_num_thoughts']

In [10]:
print(game_of_24_generate_prompt.format(inputs="2 2 3 4", max_num_thoughts="3"))

System: You are a helpful assistant that proposes next steps in Game of 24.
Human: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. Each step, you are only allowed to choose two of the remaining numbers to obtain a new number.
Human: Input: 2 8 8 14
8 variants for a possible next step:
AI: 2 + 8 = 10 (left: 8 10 14)
8 / 2 = 4 (left: 4 8 14)
14 + 2 = 16 (left: 8 8 16)
2 * 8 = 16 (left: 8 14 16)
8 - 2 = 6 (left: 6 8 14)
14 - 8 = 6 (left: 2 6 8)
14 /  2 = 7 (left: 7 8 8)
14 - 2 = 12 (left: 8 8 12)
Human: Input: 10 14
1 variants for a possible next step:
AI: 10 + 14 = 24
Human: Input: 2 2 3 4
3 variants for a possible next step:


In [11]:
def process_inputs_generator(x: Dict[str, Any]) -> str:
    if not x["intermediate_steps"]:
        return x["inputs"]

    next_numbers = re.search("^.*?\(left: (.*?)\)$", x["intermediate_steps"][-1][1])

    if next_numbers:
        return next_numbers.groups()[0]

    return x["intermediate_steps"][-1][1]


agent = (
    {
        "inputs": lambda x: process_inputs_generator(x),
        "max_num_thoughts": lambda x: x["max_num_thoughts"],
        "agent_scratchpad": lambda x: x["intermediate_steps"],
    }
    | game_of_24_generate_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | GameOf24GenerateOutputParser()
)
thought_generator = AgentThoughtGenerator(thought_generation_mode="propose")

#### Defining a trivial tool

In [12]:
from langchain.tools import tool


@tool
def simple_tool(text: str) -> str:
    """Returns its input."""
    return text

### Defining strategy

In [13]:
strategy_executor = TreeOfThoughtsDFSStrategy(
    thought_generator=thought_generator,
    thought_evaluator=thought_evaluator,
    agent=agent,
    max_num_thoughts=max_num_thoughts,
    max_num_steps=max_num_steps,
    value_threshold=value_threshold,
    max_num_retries_evaluator=num_retries_evaluator,
    tools=[simple_tool],
    verbose=True,
    return_intermediate_steps=True,
)

# Running strategy

In [15]:
strategy_executor.invoke({"inputs": "6 6 1 4"})



[1m> Entering new TreeOfThoughtsDFSStrategy chain...[0m
[32;1m[1;3m[0m[36;1m[1;3m6 * 6 = 36 (left: 1 4 36)[0m[32;1m[1;3m[0m[36;1m[1;3m1 + 4 = 5 (left: 5 36)[0m[32;1m[1;3m[0m[36;1m[1;3m5 * 36 = 180 (left: 180)[0m[32;1m[1;3m[0m[36;1m[1;3m4 * 36 = 144 (left: 1 144)[0m[32;1m[1;3m[0m[36;1m[1;3m1 * 144 = 144 (left: 144 1)[0m[32;1m[1;3m[0m[36;1m[1;3m144 / 1 = 144 (left: 144)[0m[32;1m[1;3m[0m[36;1m[1;3m144 / 144 = 1 (left: 1)[0m[32;1m[1;3m[0m[36;1m[1;3m1 + 1 + 1 + 1 = 4 (left: 3 4)[0m[32;1m[1;3m[0m[36;1m[1;3m1 - 1 + 1 + 1 = 2 (left: 3 2)[0m[32;1m[1;3m[0m[36;1m[1;3m144 / 1 = 144 (left: 1 144)[0m[32;1m[1;3m[0m[36;1m[1;3m144 - 1 = 143[0m[32;1m[1;3m[0m[36;1m[1;3m143 + 1 = 144[0m[32;1m[1;3m[0m[36;1m[1;3m144 * 1 = 144 (left: 144)[0m[32;1m[1;3m[0m[36;1m[1;3m144 / 144 = 1 (left: 1)[0m[32;1m[1;3m[0m[36;1m[1;3m1 + 1 + 1 + 1 = 4 (left: 4)[0m[32;1m[1;3m[0m[36;1m[1;3m4 * 4 + 4 = 20 (left: 4)[0m[32;1m[1;3m

{'inputs': '6 6 1 4',
 'output': '24 + 0 = 24',
 'intermediate_steps': [(AgentAction(tool='simple_tool', tool_input='6 * 6 = 36 (left: 1 4 36)', log=''),
   '6 * 6 = 36 (left: 1 4 36)'),
  (AgentAction(tool='simple_tool', tool_input='4 * 36 = 144 (left: 1 144)', log=''),
   '4 * 36 = 144 (left: 1 144)'),
  (AgentAction(tool='simple_tool', tool_input='144 / 1 = 144 (left: 1 144)', log=''),
   '144 / 1 = 144 (left: 1 144)'),
  (AgentAction(tool='simple_tool', tool_input='144 - 1 = 143', log=''),
   '144 - 1 = 143'),
  (AgentAction(tool='simple_tool', tool_input='144 / 1 = 144', log=''),
   '144 / 1 = 144'),
  (AgentAction(tool='simple_tool', tool_input='144 / 1 = 144 (left: 144)', log=''),
   '144 / 1 = 144 (left: 144)'),
  (AgentAction(tool='simple_tool', tool_input='144 / 6 = 24 (left: 24)', log=''),
   '144 / 6 = 24 (left: 24)')]}

In [16]:
strategy_executor.invoke({"inputs": "6 6 1 4"})



[1m> Entering new TreeOfThoughtsDFSStrategy chain...[0m
[32;1m[1;3m[0m[36;1m[1;3m6 * 6 = 36 (left: 1 4 36)[0m[32;1m[1;3m[0m[36;1m[1;3m1 + 4 = 5 (left: 5 36)[0m[32;1m[1;3m[0m[36;1m[1;3m4 * 36 = 144 (left: 1 144)[0m[32;1m[1;3m[0m[36;1m[1;3m1 * 144 = 144 (left: 144)[0m[32;1m[1;3m[0m[36;1m[1;3m144 / 144 = 1[0m[32;1m[1;3m[0m[36;1m[1;3m144 * 1 = 144[0m[32;1m[1;3m[0m[36;1m[1;3m144 / 1 = 144[0m[32;1m[1;3m[0m[36;1m[1;3m144 - 1 = 143 (left: 143)[0m[32;1m[1;3m[0m[36;1m[1;3m1. 143 + 0 = 143 (left: 0 143)[0m[32;1m[1;3m[0m[36;1m[1;3m2. 143 - 0 = 143 (left: 0 143)[0m[32;1m[1;3m[0m[36;1m[1;3m3. 143 / 1 = 143 (left: 1 143)[0m[32;1m[1;3m[0m[36;1m[1;3m143 - 1 = 142 (left: 142)[0m[32;1m[1;3m[0m[36;1m[1;3m1. 1 + 4 = 5 (left: 5 2)[0m[32;1m[1;3m[0m[36;1m[1;3m144 * 1 = 144 (left: 144)[0m[32;1m[1;3m[0m[36;1m[1;3m144 / 1 = 144[0m[32;1m[1;3m[0m[36;1m[1;3m144 - 1 = 143 (left: 143)[0m[32;1m[1;3m[0m[36;1m[1;3m1.

{'inputs': '6 6 1 4',
 'output': '144 / 6 = 24',
 'intermediate_steps': [(AgentAction(tool='simple_tool', tool_input='6 * 6 = 36 (left: 1 4 36)', log=''),
   '6 * 6 = 36 (left: 1 4 36)'),
  (AgentAction(tool='simple_tool', tool_input='4 * 36 = 144 (left: 1 144)', log=''),
   '4 * 36 = 144 (left: 1 144)'),
  (AgentAction(tool='simple_tool', tool_input='1 * 144 = 144 (left: 144)', log=''),
   '1 * 144 = 144 (left: 144)'),
  (AgentAction(tool='simple_tool', tool_input='144 * 1 = 144', log=''),
   '144 * 1 = 144'),
  (AgentAction(tool='simple_tool', tool_input='144 / 1 = 144', log=''),
   '144 / 1 = 144'),
  (AgentAction(tool='simple_tool', tool_input='144 * 1 = 144 (left: 144)', log=''),
   '144 * 1 = 144 (left: 144)'),
  (AgentAction(tool='simple_tool', tool_input='144 / 1 = 144', log=''),
   '144 / 1 = 144'),
  (AgentAction(tool='simple_tool', tool_input='144 * 1 = 144 (left: 144)', log=''),
   '144 * 1 = 144 (left: 144)')]}