# Game of 24: Reflexion

## Utils

### Imports

In [1]:
from langchain_openai import ChatOpenAI
from environments.game_of_24.common.evaluate_utils import (
    GameOf24EvaluateOutputParser,
    game_of_24_last_step_evaluate_prompt,
)
from environments.game_of_24.reflexion.generate_utils import (
    game_of_24_generate_reflexion_prompt,
    GameOf24GenerateReflexionOutputParser,
)
from environments.game_of_24.reflexion.self_reflection_utils import (
    game_of_24_self_reflection_prompt,
    GameOf24SelfReflexionOutputParser,
)

from planning_library.langgraph_version.strategies.reflexion.actors import AgentActor
from planning_library.langgraph_version.strategies.reflexion.evaluators import ThresholdRunnableEvaluator
from planning_library.langgraph_version.strategies.reflexion.self_reflections import RunnableSelfReflection
from planning_library.langgraph_version.strategies.reflexion import create_reflexion_strategy

import os

%load_ext autoreload
%autoreload 2

### Setting up logging

In [2]:
# os.environ["LANGCHAIN_WANDB_TRACING"] = "true"
# os.environ["WANDB_PROJECT"] = "aeliseeva-reflexion-game24-test"

In [3]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Reflexion"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

## Hyperparameters

In [4]:
# Reflexion hyperparameters
value_threshold = 1.0  # threshold for evaluation; when reached, the loop will exit
max_num_iterations = 20  # maximum number of iterations; when reached, the loop will exit

# other hyperparameters
model_name = "gpt-3.5-turbo"
temperature = 0.8

## Defining components

### Actor

In [5]:
game_of_24_generate_reflexion_prompt.input_variables

['inputs', 'intermediate_steps', 'self_reflections']

In [6]:
print(
    game_of_24_generate_reflexion_prompt.format(
        inputs="2 3 6 4", intermediate_steps="\n".join(["2 + 3 = 5 (left: 6 5 4)"]), self_reflections="none"
    )
)

System: You are an advanced reasoning agent that can improve based on self reflection.
Human: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. Each step, you are only allowed to choose two of the remaining numbers to obtain a new number. If you managed to obtain 24, output a final answer.
Human: Input: 2 8 8 14
Previous steps: none
Your previous self-reflexions: none

AI: 2 + 8 = 10 (left: 8 10 14)
Human: Input: 4 6 8 8
Previous steps: 4 * 6 = 24 (left: 24 8 8)
24 + 8 = 36 (left: 36 8)
Your previous self-reflexions: I made an error in my response, as I computed 16 + 6 = 24 incorrectly (in fact, 16 + 6 = 22) and did not use number 4 that was present in input. Next time, I should be more cautious and double-check that I am using all input numbers before providing an answer.

AI: 36 - 8 = 24
answer: 4 * 6 + 8 - 8 = 24
Human: Input: 2 3 6 4
Previous steps: 2 + 3 = 5 (left: 6 5 4)
Your previous self-reflexions: none


In [7]:
from langchain.tools import tool


@tool
def simple_tool(text: str) -> str:
    """Returns its input."""
    return text

In [8]:
agent = (
    {
        "inputs": lambda x: x["inputs"],
        "intermediate_steps": lambda x: "\n".join([_[1] for _ in x["intermediate_steps"]])
        if x["intermediate_steps"]
        else "none",
        "self_reflections": lambda x: "\n".join(x["self_reflections"]) if x["self_reflections"] else "none",
    }
    | game_of_24_generate_reflexion_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | GameOf24GenerateReflexionOutputParser()
)
tools = [simple_tool]
actor = AgentActor(agent=agent, tools=tools)

### Evaluator

In [9]:
game_of_24_last_step_evaluate_prompt.input_variables

['inputs', 'thought']

In [10]:
print(game_of_24_last_step_evaluate_prompt.format(inputs="2 3 6 4", thought="2 + 3 + 6 + 4 = 24"))

System: You are a helpful assistant that judges whether answers to Game of 24 are correct.
Human: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. Given an input and an answer, give a judgement (sure/impossible) if the answer is correct, i.e. it uses each input exactly once and no other numbers, and reach 24.
Human: Input: 4 4 6 8
Answer: (4 + 8) * (6 - 4) = 24
Judge:
AI: sure
Human: Input: 2 9 10 12
Answer: 2 * 12 * (10 - 9) = 24
Judge:
AI: sure
Human: Input: 4 9 10 13
Answer: (13 - 9) * (10 - 4) = 24
Judge:
AI: sure
Human: Input: 4 4 6 8
Answer: (4 + 8) * (6 - 4) + 1 = 25
Judge:
AI: impossible
Human: Input: 2 9 10 12
Answer: 2 * 12 * (10 - 9) = 24
Judge:
AI: sure
Human: Input: 2 9 10 12
Answer: 2 * (12 - 10) = 24
Judge:
AI: impossible
Human: Input: 4 9 10 13
Answer: (13 - 4) * (10 - 9) = 24
Judge:
AI: impossible
Human: Input: 2 3 6 4
Answer: 2 + 3 + 6 + 4 = 24
Judge:


In [11]:
evaluator_chain = (
    {
        "inputs": lambda x: x["inputs"],
        "thought": lambda x: x["agent_outcome"].return_values["output"],
    }
    | game_of_24_last_step_evaluate_prompt
    | ChatOpenAI(model="gpt-4-0125-preview", temperature=temperature)
    | GameOf24EvaluateOutputParser()
)
evaluator = ThresholdRunnableEvaluator(llm_chain=evaluator_chain, threshold=value_threshold)

### Self-Reflection

In [12]:
game_of_24_self_reflection_prompt.input_variables

['answer', 'inputs', 'intermediate_steps']

In [13]:
print(
    game_of_24_self_reflection_prompt.format(
        inputs="2 3 6 4",
        intermediate_steps="\n".join(["2 + 3 = 5 (left: 5 4 6)", "5 + 4 = 9 (left: 9 6)", "9 + 6 = 24"]),
        answer="2 + 3 + 4 + 6 = 24",
    )
)

System: You are an advanced reasoning agent that can improve based on self reflection.
Human: You will be given a previous trial in Game of 24, where you had to use basic arithmetic operations (+ - * /) with given numbers to obtain 24. You were unsuccessful. In a few sentences, diagnose a possible reason for failure and devise a new, concise, high level plan that aims to mitigate the same failure. Use complete sentences.
Here are some examples:
Human: Input: 4 6 8 10
Previous trial:
2 + 8 = 10 (left: 10 10 4)
10 + 10 = 20 (left: 20 4)
20 + 4 = 24
Answer: 2 + 8 + 10 + 4 = 24
Reflection:
AI: I made an error in my response, as I used number 2 that was not present in input. Next time, I should be more cautious and double-check that I am only using input numbers before providing an answer.
Human: Input: 4 6 8 10
Previous trial:
10 + 8 = 18 (left: 18 4 6)
18 + 6 = 24 (left: 24 4)
Answer: 10 + 8 + 6 = 24
Reflection:
AI: I made an error in my response, as I did not use number 4 that was presen

In [14]:
self_reflection_chain = (
    {
        "inputs": lambda x: x["inputs"],
        "intermediate_steps": lambda x: "\n".join([_[1] for _ in x["intermediate_steps"]])
        if x["intermediate_steps"]
        else "none",
        "answer": lambda x: x["agent_outcome"].return_values["output"],
    }
    | game_of_24_self_reflection_prompt
    | ChatOpenAI(model=model_name, temperature=temperature)
    | GameOf24SelfReflexionOutputParser()
)
self_reflection = RunnableSelfReflection(llm_chain=self_reflection_chain)

## Defining strategy

In [15]:
reflexion = create_reflexion_strategy(
    actor=actor, evaluator=evaluator, self_reflection=self_reflection, max_num_iterations=max_num_iterations
)

In [16]:
reflexion.invoke({"inputs": "1 1 4 6"})

{'inputs': '1 1 4 6',
 'agent_outcome': AgentFinish(return_values={'output': '1 * 1 * 4 * 6 = 24'}, log='Successfully reached 24.'),
 'evaluator_score': 1.0,
 'self_reflections': ['Great job on solving the trial correctly! Your approach of multiplying the numbers together was efficient and effective in reaching the target number of 24. Keep up the good work!',
  'Great job on finding the correct solution! In this case, you successfully utilized multiplication to combine the numbers and reach the target of 24. Your approach was efficient and effective. Keep up the good work!'],
 'intermediate_steps': [],
 'iteration': 4}

In [17]:
reflexion.invoke({"inputs": "2 3 5 12"})

{'inputs': '2 3 5 12',
 'agent_outcome': AgentFinish(return_values={'output': '2 * 3 * 5 - 12 = 24'}, log='Successfully reached 24.'),
 'evaluator_score': 1.0,
 'self_reflections': [],
 'intermediate_steps': [(AgentAction(tool='simple_tool', tool_input='3 * 5 = 15 (left: 2 15 12)', log='3 * 5 = 15 (left: 2 15 12)'),
   '3 * 5 = 15 (left: 2 15 12)')],
 'iteration': 2}

In [18]:
reflexion.invoke({"inputs": "5 6 6 12"})

{'inputs': '5 6 6 12',
 'agent_outcome': AgentFinish(return_values={'output': '5 * ((6 * 6) / 12) = 24'}, log='Successfully reached 24.'),
 'evaluator_score': 1.0,
 'self_reflections': [],
 'intermediate_steps': [(AgentAction(tool='simple_tool', tool_input='6 * 6 = 36 (left: 5 36 12)', log='6 * 6 = 36 (left: 5 36 12)'),
   '6 * 6 = 36 (left: 5 36 12)')],
 'iteration': 2}

In [19]:
reflexion.invoke({"inputs": "5 6 6 12"})

{'inputs': '5 6 6 12',
 'agent_outcome': AgentFinish(return_values={'output': '(5 * 6) - 6 + 12 = 24'}, log='Successfully reached 24.'),
 'evaluator_score': 1.0,
 'self_reflections': ['In my previous response, I made an error by not using all the numbers provided in the input. Next time, I should ensure that I use all the given numbers in the calculations to find a solution.'],
 'intermediate_steps': [(AgentAction(tool='simple_tool', tool_input='5 * 6 = 30 (left: 6 12 30)', log='5 * 6 = 30 (left: 6 12 30)'),
   '5 * 6 = 30 (left: 6 12 30)')],
 'iteration': 3}