# Game of 24: ToT + DFS

## Utils

### Imports

In [16]:
from langchain_openai import ChatOpenAI
from environments.game_of_24.common.evaluate_utils import GameOf24EvaluateOutputParser, game_of_24_evaluate_prompt
from environments.game_of_24.common.generate_utils import GameOf24GenerateOutputParser, game_of_24_generate_prompt
from planning_library.custom_agent_executor.strategies import TreeOfThoughtsDFSStrategy
from planning_library.custom_agent_executor.strategies.toolllm.thought_generators import AgentThoughtGenerator
from planning_library.custom_agent_executor.strategies.toolllm.thought_evaluators import LLMThoughtEvaluator
from langchain_core.agents import AgentAction
from typing import Dict, Any
import os
import re

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Enabling W&B 

In [17]:
os.environ["LANGCHAIN_WANDB_TRACING"] = "true"
os.environ["WANDB_PROJECT"] = "aeliseeva-tot-dfs-game24-test"
os.environ["WANDB_BASE_URL"] = "https://jetbrains.wandb.io/"

### Enabling LangSmith

In [18]:
from uuid import uuid4

unique_id = uuid4().hex[0:8]
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"Tracing Walkthrough - {unique_id}"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

In [19]:
from langsmith import Client

client = Client()

### Defining components

#### Hyperparameters

In [20]:
# ToT hyperparameters
max_num_thoughts = 3  # number of thoughts to generate at each iteration
max_num_steps = 20  # total maximum number of iterations
value_threshold = 0.49  # threshold for evaluation; only thoughts with value > value_threshold will be explored
num_retries_evaluator = 5  # how many times to query for evaluation

# other hyperparameters
model_name = "gpt-3.5-turbo"
temperature = 0.8

#### Thought Evaluator

Evaluation prompt is adapted from the [original implementation](https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py).

In [21]:
game_of_24_evaluate_prompt.input_variables

['inputs', 'thought']

In [22]:
print(game_of_24_evaluate_prompt.format(inputs="2 3 6 4", thought="2 + 3 = 5 (left: 6 5 4)"))

System: You are a helpful assistant that judges whether given numbers can reach 24.
Human: Evaluate if given numbers can reach 24 (sure/likely/impossible)
Human: Input: 2 8 14
Thought: 2 + 8 = 10 (left: 10 14)
Judge:
AI: 10 + 14 = 24
sure
Human: Input: 6 8 14
Thought: 2 + 8 = 10 (left: 10 14)
Judge:
AI: 10 + 14 = 24
but there was no 2 in input
impossible
Human: Input: 2 6 11
Thought: 2 * 6 = 12 (left: 11 12)
Judge:
AI: 11 + 12 = 23
12 - 11 = 1
11 * 12 = 132
11 / 12 = 0.91
impossible
Human: Input: 2 2 4 10
Thought: 2 + 2 = 4 (left: 4 4 10)
Judge:
AI: 4 + 4 + 10 = 8 + 10 = 18
4 * 10 - 4 = 40 - 4 = 36
(10 - 4) * 4 = 6 * 4 = 24
sure
Human: Input: 4 1 9 10
Thought: 10 + 1 = 11 (left: 4 9 11)
Judge:
AI: 9 + 11 + 4 = 20 + 4 = 24
sure
Human: Input: 5 2 7 4
Thought: 2 * 4 = 8 (left: 5 7 8)
Judge:
AI: 5 + 7 + 8 = 12 + 8 = 20
(8 - 5) * 7 = 3 * 7 = 21
I cannot obtain 24 now, but numbers are within a reasonable range
likely
Human: Input: 5 6 2 3
Thought: 2 * 3 = 6 (left: 5 6 6)
Judge:
AI: 5 + 6 + 6

In [23]:
def process_inputs_evaluator(x: Dict[str, Any]) -> str:
    if not x["current_state"]:
        return x["inputs"]

    next_numbers = re.search("^.*?\(left: (.*?)\)$", x["current_state"][-1][1])

    if next_numbers:
        return next_numbers.groups()[0]

    return x["current_state"][-1][1]


evaluator_chain = (
    {
        "inputs": lambda x: process_inputs_evaluator(x),
        "thought": lambda x: x["thought"].tool_input
        if isinstance(x["thought"], AgentAction)
        else x["thought"].return_values["output"],
    }
    | game_of_24_evaluate_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | GameOf24EvaluateOutputParser()
)
thought_evaluator = LLMThoughtEvaluator(llm_chain=evaluator_chain)

#### Thought Generator

Generation prompt is adapted from the [original implementation](https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py).

In [24]:
game_of_24_generate_prompt.input_variables

['inputs', 'max_num_thoughts']

In [25]:
print(game_of_24_generate_prompt.format(inputs="2 2 3 4", max_num_thoughts="3"))

System: You are a helpful assistant that proposes next steps in Game of 24.
Human: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. Each step, you are only allowed to choose two of the remaining numbers to obtain a new number.
Human: Input: 2 8 8 14
8 variants for a possible next step:
AI: 2 + 8 = 10 (left: 8 10 14)
8 / 2 = 4 (left: 4 8 14)
14 + 2 = 16 (left: 8 8 16)
2 * 8 = 16 (left: 8 14 16)
8 - 2 = 6 (left: 6 8 14)
14 - 8 = 6 (left: 2 6 8)
14 /  2 = 7 (left: 7 8 8)
14 - 2 = 12 (left: 8 8 12)
Human: Input: 10 14
1 variants for a possible next step:
AI: 10 + 14 = 24
Human: Input: 2 2 3 4
3 variants for a possible next step:


In [26]:
def process_inputs_generator(x: Dict[str, Any]) -> str:
    if not x["intermediate_steps"]:
        return x["inputs"]

    next_numbers = re.search("^.*?\(left: (.*?)\)$", x["intermediate_steps"][-1][1])

    if next_numbers:
        return next_numbers.groups()[0]

    return x["intermediate_steps"][-1][1]


agent = (
    {
        "inputs": lambda x: process_inputs_generator(x),
        "max_num_thoughts": lambda x: x["max_num_thoughts"],
        "agent_scratchpad": lambda x: x["intermediate_steps"],
    }
    | game_of_24_generate_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | GameOf24GenerateOutputParser()
)
thought_generator = AgentThoughtGenerator(thought_generation_mode="propose")

#### Defining a trivial tool

In [27]:
from langchain.tools import tool


@tool
def simple_tool(text: str) -> str:
    """Returns its input."""
    return text

### Defining strategy

In [28]:
strategy_executor = TreeOfThoughtsDFSStrategy(
    thought_generator=thought_generator,
    thought_evaluator=thought_evaluator,
    agent=agent,
    max_num_thoughts=max_num_thoughts,
    max_num_steps=max_num_steps,
    value_threshold=value_threshold,
    max_num_retries_evaluator=num_retries_evaluator,
    tools=[simple_tool],
    verbose=True,
    return_intermediate_steps=True,
)

# Running strategy

In [29]:
strategy_executor.invoke({"inputs": "6 6 1 4"})

[34m[1mwandb[0m: Streaming LangChain activity to W&B at https://jetbrains.wandb.io/machine-learning-methods-in-software-engineering/aeliseeva-tot-dfs-game24-test/runs/ndph4e9w
[34m[1mwandb[0m: `WandbTracer` is currently in beta.
[34m[1mwandb[0m: Please report any issues to https://github.com/wandb/wandb/issues with the tag `langchain`.




[1m> Entering new TreeOfThoughtsDFSStrategy chain...[0m
[32;1m[1;3m[0m[36;1m[1;3m6 + 4 = 10 (left: 6 1 10)[0m[32;1m[1;3m[0m[36;1m[1;3m6 + 1 = 7 (left: 7 10)[0m[32;1m[1;3m[0m[36;1m[1;3m7 - 10 = -3 (left: -3)[0m[32;1m[1;3m[0m[36;1m[1;3m-3 * (-3) = 9[0m[32;1m[1;3m[0m[36;1m[1;3m-3 + 9 = 6[0m[32;1m[1;3m[0m[36;1m[1;3m6 - 9 = -3 (left: -3 -3)[0m[32;1m[1;3m[0m[36;1m[1;3m6 * 1 = 6 (left: 6 10)[0m[32;1m[1;3m[0m[36;1m[1;3m10 - 6 = 4 (left: 4)[0m[32;1m[1;3m[0m[36;1m[1;3m4 * 4 + 4 = 20[0m[32;1m[1;3m[0m[36;1m[1;3m4 * 4 + 4 = 20 is already equal to 24, so no further steps are needed.[0m[32;1m[1;3m[0m[36;1m[1;3mThere are no further steps needed since the input already equals 24.[0m[32;1m[1;3m[0m[36;1m[1;3mNo further steps are needed since the input already equals 24.[0m[32;1m[1;3m[0m[36;1m[1;3mNo further steps are needed since the input already equals 24.[0m[32;1m[1;3m[0m[36;1m[1;3mNo further steps are needed since 

{'inputs': '6 6 1 4',
 'output': '24 + 0 = 24',
 'intermediate_steps': [(AgentAction(tool='simple_tool', tool_input='6 + 4 = 10 (left: 6 1 10)', log=''),
   '6 + 4 = 10 (left: 6 1 10)'),
  (AgentAction(tool='simple_tool', tool_input='10 - 6 = 4 (left: 4 6)', log=''),
   '10 - 6 = 4 (left: 4 6)'),
  (AgentAction(tool='simple_tool', tool_input='4 * 6 = 24 (left: 24)', log=''),
   '4 * 6 = 24 (left: 24)')]}

In [30]:
strategy_executor.invoke({"inputs": "6 6 1 4"})



[1m> Entering new TreeOfThoughtsDFSStrategy chain...[0m
[32;1m[1;3m[0m[36;1m[1;3m6 + 6 = 12 (left: 12 1 4)[0m[32;1m[1;3m[0m[36;1m[1;3m12 + 1 = 13 (left: 4 13)[0m[32;1m[1;3m[0m[36;1m[1;3m4 * 13 = 52 (left: 52)[0m[32;1m[1;3m[0m[36;1m[1;3m12 * 1 = 12 (left: 12 4)[0m[32;1m[1;3m[0m[36;1m[1;3m12 * 4 = 48 (left: 48)[0m[32;1m[1;3m[0m

[1m> Finished chain.[0m


{'inputs': '6 6 1 4',
 'output': '48 / 2 = 24',
 'intermediate_steps': [(AgentAction(tool='simple_tool', tool_input='6 + 6 = 12 (left: 12 1 4)', log=''),
   '6 + 6 = 12 (left: 12 1 4)'),
  (AgentAction(tool='simple_tool', tool_input='12 * 1 = 12 (left: 12 4)', log=''),
   '12 * 1 = 12 (left: 12 4)'),
  (AgentAction(tool='simple_tool', tool_input='12 * 4 = 48 (left: 48)', log=''),
   '12 * 4 = 48 (left: 48)')]}