# Utils

## Imports

In [20]:
from langchain_openai import ChatOpenAI
from textwrap import dedent
from environments.game_of_24.reflexion.generate_prompts import openai_tools_generate_prompt

from planning_library.strategies.reflexion import ReflexionStrategy
from planning_library.strategies.reflexion.components import ReflexionActor, ReflexionEvaluator, ReflexionSelfReflection
from environments.game_of_24.common.environment import GameOf24Env

import os

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Logging

In [21]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Reflexion (new) + Game of 24"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

## Defining environment

In [22]:
env = GameOf24Env()

## Defining hyperparameters

In [23]:
# Reflexion hyperparameters
value_threshold = 1.0  # threshold for evaluation; when reached, the loop will exit
max_num_iterations = 2  # maximum number of iterations; when reached, the loop will exit

# other hyperparameters
model_name = "gpt-3.5-turbo"
temperature = 0.8

# Defining strategy components

## Agent

### Working with function calling parsers

There are two base function calling parsers classes (for single-action & for multi-action), defined in `planning_library.function_calling_parsers.base_parser`. 

I implemented output parsers for OpenAI Functions (in `planning_library.function_calling_parsers.openai_functions_parser`) and OpenAI Tools (in `planning_library.function_calling_parsers.openai_tools_parser`).

There is also a registry for accessing available parsers conveniently:

In [24]:
from planning_library.function_calling_parsers import ParserRegistry


ParserRegistry.get_available_parsers()

['openai-functions', 'openai-tools']

In [25]:
ParserRegistry.get_parser("openai-functions")

<planning_library.function_calling_parsers.openai_functions_parser.OpenAIFunctionsParser at 0x176c8e7d0>

In [26]:
ParserRegistry.get_parser("openai-123")

ValueError: Unknown parser openai-123. Currently available are: ['openai-functions', 'openai-tools']

### Instantiating an agent


Agent components:

* LLM (`BaseChatModel` from LangChain)
* Prompt (`ChatPromptTemplate` from LangChain)
* Tools (`Sequence[BaseTool]`, where `BaseTool` is a LangChain primitive)
* Function Calling Parser (either passed explicitly or by its name)

In [27]:
from planning_library.components.agent_component import AgentFactory

# passing parser's name
agent = AgentFactory.create_agent(llm=ChatOpenAI(model=model_name, temperature=temperature), prompt=openai_tools_generate_prompt, tools=env.tools, parser_name="openai-tools")

In [28]:
from planning_library.function_calling_parsers import OpenAIToolsParser

# passing parser explicitly
agent = AgentFactory.create_agent(llm=ChatOpenAI(model=model_name, temperature=temperature), prompt=openai_tools_generate_prompt, tools=env.tools, parser=OpenAIToolsParser())

In [29]:
# one way to define an actor component after agent has been created
actor = ReflexionActor(agent=agent)

In [30]:
# each component powered by agent also has a similar interface
# also, there's an option to define only part of the prompt instead of passing full template; default prompt, specific for current component in current strategy, will be used
actor = ReflexionActor.create(llm=ChatOpenAI(model=model_name, temperature=temperature), tools=env.tools, parser=OpenAIToolsParser(), user_message=dedent("""
            You are given four numbers, and your goal is to obtain 24 from given numbers via basic arithmetic operations. 
            This might be not the first attempt you took, so pay attention to self-reflections about your previous failures. 
            When you're ready to answer, make sure to include a mathematical expression showing how to obtain 24 from given numbers, 
            for instance: '(2 + 2) * (12 / 2) = 24'.
            
            Inputs:
            {numbers}"""))

## Other components

Other Reflexion components are even simpler by default, since they don't use tools.

* Self-Reflection needs only prompt + LLM.
* Evaluator needs prompt + LLM + threshold (it outputs a float, and the execution loop will continue only if float < threshold).

However, in this case, both components also receive intermediate_steps as part of input, so they also need the same parser.


In [31]:
evaluator = ReflexionEvaluator.create(llm=ChatOpenAI(model=model_name, temperature=temperature), 
                                      user_message=dedent(""""
                                      Given an input and an answer, give a judgement if the answer is correct, i.e. 
                                      1) it uses each given number exactly once; 
                                      2) it doesn't use any other number; 
                                      3) given mathematical expression correctly reaches 24.
                                      
                                      Inputs:
                                      {numbers}
                                      """), 
                                      threshold=value_threshold,
                                      parser_name="openai-tools")

In [32]:
self_reflection = ReflexionSelfReflection.create(llm=ChatOpenAI(model=model_name, temperature=temperature),
                                                 user_message=dedent("""
                                                 You will be given inputs for Game of 24 and your previous trial, 
                                                 where you had to use basic arithmetic operations (+ - * /) with given numbers to obtain 24. 
                                                 You were unsuccessful.
                                                 
                                                 Inputs:
                                                 {numbers}
                                                 """), parser_name="openai-tools")

In [33]:
from planning_library.action_executors import GymnasiumActionExecutor
from typing import Dict, Any


def reset_environment(inputs: Dict[str, Any]) -> None:
    env.reset(options={"numbers": [float(n) for n in inputs["numbers"].split()]})


action_executor = GymnasiumActionExecutor(env)

In [34]:
reflexion = ReflexionStrategy.create_from_components(
    action_executor=action_executor,
    actor=actor,
    evaluator=evaluator,
    self_reflection=self_reflection,
    reset_environment=reset_environment,
    max_iterations=max_num_iterations,
)

In [35]:
env.reset(options={"numbers": [1, 1, 4, 6]})
reflexion.invoke(
    {"inputs": {"numbers": "1 1 4 6"}},
    {"recursion_limit": 1000},
)

{'inputs': {'numbers': '1 1 4 6'},
 'agent_outcome': AgentFinish(return_values={'output': "I have explored various combinations of arithmetic operations with the given numbers, but I still couldn't reach the target of 24. It seems challenging to achieve the desired result with these particular numbers. Let's reassess and try a different approach."}, log="I have explored various combinations of arithmetic operations with the given numbers, but I still couldn't reach the target of 24. It seems challenging to achieve the desired result with these particular numbers. Let's reassess and try a different approach."),
 'evaluator_should_continue': True,
 'self_reflection_memory': ChatMessageHistory(messages=[('content', 'One possible reason for failure could be focusing too much on individual arithmetic operations without considering alternative combinations. To mitigate this, I should explore different sequences of operations earlier in the process rather than getting fixated on a single path