# Utils

## Imports

In [1]:
from langchain_openai import ChatOpenAI
from textwrap import dedent
from planning_library.strategies.reflexion import ReflexionStrategy
from planning_library.strategies.reflexion.components import ReflexionActor, ReflexionEvaluator, ReflexionSelfReflection
from environments.game_of_24.environment import GameOf24Env

import os

%load_ext autoreload
%autoreload 2

## Logging

In [2]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Reflexion (new) + Game of 24"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

## Defining environment

In [3]:
env = GameOf24Env()

## Defining hyperparameters

In [4]:
# Reflexion hyperparameters
value_threshold = 1.0  # threshold for evaluation; when reached, the loop will exit
max_num_iterations = 2  # maximum number of iterations; when reached, the loop will exit

# other hyperparameters
model_name = "gpt-3.5-turbo"
temperature = 0.8

# Defining strategy components

## Agent

Agent components:

* LLM (`BaseChatModel` from LangChain)
* Prompt (`ChatPromptTemplate` from LangChain)
* Tools (`Sequence[BaseTool]`, where `BaseTool` is a LangChain primitive)
* Function Calling Parser (either passed explicitly or by its name)

In [11]:
actor = ReflexionActor.create(llm=ChatOpenAI(model=model_name, temperature=temperature), tools=env.tools, parser_name="openai-tools", user_message=dedent("""
            You are given four numbers, and your goal is to obtain 24 from given numbers via basic arithmetic operations. 
            This might be not the first attempt you took, so pay attention to self-reflections about your previous failures. 
            When you're ready to answer, make sure to include a mathematical expression showing how to obtain 24 from given numbers, 
            for instance: '(2 + 2) * (12 / 2) = 24'.
            
            Inputs:
            {numbers}"""))

## Other components

Other Reflexion components are even simpler by default, since they don't use tools.

* Self-Reflection needs only prompt + LLM.
* Evaluator needs prompt + LLM + threshold (it outputs a float, and the execution loop will continue only if float < threshold).

However, in this case, both components also receive intermediate_steps as part of input, so they also need the same parser.


In [12]:
evaluator = ReflexionEvaluator.create(llm=ChatOpenAI(model=model_name, temperature=temperature), 
                                      user_message=dedent(""""
                                      Given an input and an answer, give a judgement if the answer is correct, i.e. 
                                      1) it uses each given number exactly once; 
                                      2) it doesn't use any other number; 
                                      3) given mathematical expression correctly reaches 24.
                                      
                                      Inputs:
                                      {numbers}
                                      """), 
                                      threshold=value_threshold,
                                      parser_name="openai-tools")

In [13]:
self_reflection = ReflexionSelfReflection.create(llm=ChatOpenAI(model=model_name, temperature=temperature),
                                                 user_message=dedent("""
                                                 You will be given inputs for Game of 24 and your previous trial, 
                                                 where you had to use basic arithmetic operations (+ - * /) with given numbers to obtain 24. 
                                                 You were unsuccessful.
                                                 
                                                 Inputs:
                                                 {numbers}
                                                 """), parser_name="openai-tools")

In [14]:
from planning_library.action_executors import GymnasiumActionExecutor
from typing import Dict, Any


def reset_environment(inputs: Dict[str, Any]) -> None:
    env.reset(options={"numbers": [float(n) for n in inputs["numbers"].split()]})


action_executor = GymnasiumActionExecutor(env)

In [15]:
reflexion = ReflexionStrategy.create_from_components(
    action_executor=action_executor,
    actor=actor,
    evaluator=evaluator,
    self_reflection=self_reflection,
    reset_environment=reset_environment,
    max_iterations=max_num_iterations,
)

In [16]:
env.reset(options={"numbers": [1, 1, 4, 6]})
reflexion.invoke(
    {"inputs": {"numbers": "1 1 4 6"}},
    {"recursion_limit": 1000},
)

{'inputs': {'numbers': '1 1 4 6'},
 'agent_outcome': AgentFinish(return_values={'output': "I have completed the calculations, but unfortunately, I couldn't reach the target number 24.\n\nFinal Result:\n- Addition: 7 + 4 = 11\n\nRemaining number: 11\n\nI will reflect on this attempt and strategize for the next one to improve the chances of reaching the goal.\nDiagnosis:\n- In this attempt, I failed to reach the target number 24 by focusing on addition and multiplication only, without considering other operations like subtraction or division.\n- I also didn't explore all possible combinations of the numbers, limiting the potential solutions.\n\nNew Plan:\n- For the next attempt, I will incorporate subtraction and division operations to broaden the scope of possible solutions.\n- I will systematically explore various permutations and combinations of the numbers to increase the chances of success.\n- By maintaining a balanced approach to arithmetic operations, I aim to find the correct exp