# LLM Agents

## Setup

In [6]:
import json
import os
import sys
from pathlib import Path

# !pip install wikipedia
import wikipedia
from wikipedia import WikipediaPage
import openai
from wikipedia import DisambiguationError, PageError
from openai import OpenAI
from openai.types.chat.chat_completion_message_tool_call import (
    ChatCompletionMessageToolCall,
)
from openai.types.chat.chat_completion_message import ChatCompletionMessage
# !pip install anthropic
from anthropic import Anthropic
from typing import Literal, Optional, Dict, List, Any
from abc import abstractmethod
import math
import re

#Make sure exercises are in the path
# exercises_dir = Path(f"{os.getcwd().split(chapter)[0]}/{chapter}/exercises").resolve()
# section_dir = (exercises_dir / "part4_llm_agent_evals").resolve()
# if str(exercises_dir) not in sys.path: sys.path.append(str(exercises_dir))
# os.chdir(exercises_dir)

from utils import import_json, save_json, retry_with_exponential_backoff, pretty_print_questions, load_jsonl, omit
from utils import countrylist
from utils import evaluate_expression, apply_user_format, apply_assistant_format, establish_client_anthropic, establish_client_OpenAI, retry_with_exponential_backoff, apply_system_format
import part4_llm_agents.tests as tests

from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = api_key

client = OpenAI()

In [7]:
class SimpleAgent:
    def __init__(
        self,
        task: Any = None,
        model: Literal["gpt-4o-mini"] = "gpt-4o-mini",
        tools: Optional[List[Any]] = None,
        chat_history: Optional[List[dict]] = None,
    ):
        self.model = model
        self.task = task
        self.tools = tools
        self.client = OpenAI()
        self.chat_history = chat_history if chat_history else []

    @retry_with_exponential_backoff
    def get_response(self, use_tool: bool = True) -> ChatCompletionMessage:
        """
        Get the response from the model via an API call, with the option of tool calling.

        Args:
            use_tool (bool): Whether to use tool calling or not

        Returns:
            ChatCompletionMessage: The response from the model
        """
        response = self.client.chat.completions.create(
            model=self.model,
            messages=self.chat_history,
            tools=[tool.description for tool in self.tools] if use_tool else None,
            tool_choice="auto" if use_tool else None,
        )
        return response.choices[0].message

    def execute_tool_calls(self, message: ChatCompletionMessage) -> List[str]:
        """
        Execute the tool calls in the message and return a list of tool_responses.

        Args:
            message (ChatCompletionMessage): The message containing the tool calls

        Returns:
            List[str]: A list of tool responses (as strings)
        """
        tool_calls = message.tool_calls

        tool_responses = []
        for tool_call in tool_calls:
            if not self.task:
                raise ValueError("Task is not set. Cannot execute tool calls.")
            func = next(
                (tool for tool in self.tools if tool.name == tool_call.function.name),
            )
            arguments = json.loads(tool_call.function.arguments)
            tool_response = func.execute(**arguments, task=self.task)
            tool_responses.append(tool_response)

        return tool_responses

    def run(self, with_tool: bool = True) -> ChatCompletionMessage:
        """
        Default implementation of run method.
        This can be overridden in subclasses for specific behavior.

        Args:
            with_tool (bool): Whether to use tool calling or not

        Returns:
            str: The response from the model
        """
        print(f"Running SimpleAgent...")
        instruction = self.task.instruction
        self.chat_history.append(apply_user_format(instruction))
        response = self.get_response(use_tool=with_tool)
        return response

## Building a simple Arithemtic Agent
We are going to implement 4 things:
1. `ArithemticTask` class for handling the generation of arithemtic problems and their solutions
2. `CalcualteTool` a tool the LLM can use to solve the task
3. `ArithmeticAgent` class for handling the LLM API, calculations, and keeping track of overall progress
4. `AgentLoop` function that defines the interaction loop between the task and the LLM (agent).

In [8]:
class ArithmeticAgent(SimpleAgent):
    """
    ArithmeticAgent class for doing simple arithmetic tasks.

    Inherits from SimpleAgent which includes the following attributes and methods:

    Attributes:
        model (str): The model used for generating responses (inherited)
        tool_descriptions (List[dict]): List of tool descriptions (inherited)
        client (OpenAI): OpenAI client for API calls (inherited)
        task (Any): The current task being executed (inherited)
        chat_history (List[dict]): History of interactions (inherited)

    Methods:
        get_response(use_tool: bool = True) -> ChatCompletionMessage:
            Get response from the model (inherited)

        execute_tool_calls(message: ChatCompletionMessage) -> List[str]:
            Execute tool calls from the model's response (inherited)

        run(with_tool: bool = True) -> bool:
            Run one loop of the Wikipedia agent
    """

    def __init__(
        self,
        model: Literal["gpt-4o-mini"] = "gpt-4o-mini",
        task: Any = None,
        tools: Optional[List[Any]] = None,
        chat_history: List[dict] = None,
        verbose: bool = True,
    ):
        super().__init__(model=model, task=task, tools=tools, chat_history=chat_history)
        self.verbose = verbose

    def handle_tool_calls(self, response: ChatCompletionMessage):
        """
        Handle the tool calls from the model response. This function should:
        - Execute the tool calls
        - Append the tool calls and responses to the chat history

        Args:
            response (ChatCompletionMessage): The response from the model
        """
        if self.verbose:
            print("\nTool calls:", response.tool_calls)

        # Append response to chat history
        self.chat_history.append(response)

        # Execute the tool calls and append tool responses to chat history
        tool_calls = response.tool_calls
        try:
            tool_responses = self.execute_tool_calls(response)
            for tool_call, tool_response in zip(tool_calls, tool_responses):
                self.chat_history.append(
                    apply_tool_call_format(tool_call, tool_response)
                )
                if self.verbose:
                    print(f"\nTool call: {tool_call.function.name}, ARGS: {tool_call.function.arguments}")
                    print(f"Tool response: {tool_response}")
        except Exception as e:
            print(f"\nError handling tool calls: {e}")

    def handle_refusal(self, response: ChatCompletionMessage):
        """
        Handle the refusal from the model response. This function should only be called if the model refuses to answer and should:
        - Append the refusal to the chat history
        - Update the task state

        Args:
            response (ChatCompletionMessage): The response from the model
        """
        if self.verbose:
            print("\nModel Refusal:", response.refusal)
        self.chat_history.append(apply_assistant_format(response.refusal))
        self.task.update_current_task()

    def generate_and_check_final_answer(self) -> Literal["Correct", "Incorrect"]:
        """
        This function should:
        - Get the model to generate a final answer to the question (after it has seen the tool response)
        - Then check this final answer against the correct answer.
        - If the answer is correct, update the task state.
        - Then append to chat history (and return) "Correct" if the answer is correct and "Incorrect" if the answer is incorrect.

        Args:
            None

        Returns:
            str: "Correct" or "Incorrect"
        """

        # Get the final response from the model after tool responses

        response = self.get_response(use_tool=False)
        self.chat_history.append(apply_assistant_format(response.content))

        # Check the answer
        try:
            model_answer = self.parse_answer(response)

            if self.task.check_answer(model_answer):
                self.chat_history.append(apply_user_format("Correct."))

                if self.verbose:
                    print("\nUser: Correct.")

                # Update to the next task
                self.task.update_current_task()

                return "Correct"

            else:
                self.chat_history.append(apply_user_format("Incorrect."))
                if self.verbose:
                    print("\nUser: Incorrect.")
                return "Incorrect"
                # Retry the task

        # Ends the task if there's an error parsing the model answer
        except Exception as e:
            if self.verbose:
                print(f"\nError parsing model answer: {e}")
            raise

    def run(self, with_tool: bool):
        """
        Run one loop of the agent, which involves:
        - getting a task
        - getting a response from the model
        - handling the model response, including tool calls, refusals, no tool calls, parsing and checking final answers, errors.
        - managing memory: storing the history of messages to self.chat_history
        - managing task state: staying on the same task or moving to the next task at the end of the loop
        """
        # Get the task instruction
        instruction = self.task.instruction
        if self.verbose:
            print("\nUSER:", instruction)
        self.chat_history.append(apply_user_format(instruction))

        # Get the response from the model
        response = self.get_response(use_tool=with_tool)

        if self.verbose:
            print("\nModel response:", response.content)

        # Handle the response
        ## If model makes tool calls, handle the tool calls
        if response.tool_calls:
            self.handle_tool_calls(response)

            # Then get the final answer from the model
            self.generate_and_check_final_answer()

        ## If no tool call: Handle edge cases

        ### Check if there's a refusal to answer:
        elif response.refusal:
            self.handle_refusal(response)

        else:
            self.generate_and_check_final_answer()

    def parse_answer(self, message: ChatCompletionMessage) -> float:
        """
        Extract the numerical answer from the string output of the model

        Args:
            message (ChatCompletionMessage): The response from the model

        Returns:
            float: The numerical answer extracted from the model
        """
        response = message.content
        if response.find("<answer>") != -1:
            startpoint = response.find("<answer>") + 8
            endpoint = response.find("</answer>")
            return float(response[startpoint:endpoint])

In [9]:
# Calculate Tool

class CalculateTool():
    name = "calculate"

    @staticmethod
    def execute(expression: str, task: Any = None) -> str:
        """
        Evaluates the string expression in Python using `evaluate_expression()` and returns the result as a string

        Args:
            expression (str): The arithmetic expression to evaluate
            task (Any): Not used in this function

        Returns:
            str: The result of the arithmetical expression as a string
        """
        try:
            return str(evaluate_expression(expression))
        except (SyntaxError, NameError, ZeroDivisionError) as e:
            return f"Error: {str(e)}"

    @property
    def description(self):
        """
        Provides the description of the tool

        Returns:
            dict: The description of the tool
        """

        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": 'Calculates the result of an arithmetic expression. For example, you could provide an input in the form "2+3" and the function would return 5. Or you could provide an expression like "10/3" and the function would return 3.3333333333333335.',
                "parameters": {
                    "type": "object",
                    "properties": {
                        "expression": {
                            "type": "string",
                            "description": "The arithmetic expression that you want to be evaluated.",
                        }
                    },
                    "required": ["expression"],
                    "additionalProperties": False,
                },
            },
        }
    
tests.run_calculate_tool_tests(CalculateTool)

Calculator = CalculateTool()

test_execute_valid_expressions passed
test_description_property passed
All CalculateTool tests passed successfully!


In [10]:
messages = [{"role": "user", "content": "Calculate 2+3"}]
client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages,
    tools=[Calculator.description],
    tool_choice="auto",
)

print(response.choices[0].message.content)
print(response.choices[0].message.tool_calls)

None
[ChatCompletionMessageToolCall(id='call_fHOk2PqlZuA9H5A2nvZmwD47', function=Function(arguments='{"expression":"2+3"}', name='calculate'), type='function')]


In [11]:
def apply_tool_call_format(tool_call: ChatCompletionMessageToolCall, content: str) -> dict:
    """
    Formats the response of a tool call to be returned to the model.
    Args:
        - tool_call (ChatCompletionMessageToolCall) : The tool call object
        - content (str) : This is the tool response (i.e. results from executing the tool)

    Returns:
        - dict : The formatted tool response to be returned to the model
    """
    return {
        "role": "tool",
        "content": content, # e.g. "5"
        "tool_call_id": tool_call.id,
        "name": tool_call.function.name
    }

In [12]:
messages = [{"role": "user", "content": "Calculate 5/3. Be precise."}]
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages,
    tools=[Calculator.description],
    tool_choice="auto",
)

messages.extend(
    [
        response.choices[0].message,
        apply_tool_call_format(
            response.choices[0].message.tool_calls[0],
            Calculator.execute(
                json.loads(
                    response.choices[0].message.tool_calls[0].function.arguments
                )["expression"]
            ),
        ),
    ]
)

response_to_tool_calls = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages,
    tools=[Calculator.description],
    tool_choice="auto",
)
# print(response_to_tool_calls.choices[0].message.content)

# print(response.choices[0].message.tool_calls[0])
print(response.choices[0].message)

ChatCompletionMessage(content=None, refusal=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_9w0ucMleas4RjOH4bcZPE3Tc', function=Function(arguments='{"expression":"5/3"}', name='calculate'), type='function')])


## Simple Agent

In [14]:
class ArithmeticTask:
    def __init__(self, num1: int | float, num2: int | float):
        self.num1 = num1
        self.num2 = num2
        self.operations: List[str] = ["+", "-", "*", "/", "%", "//"]
        self.correct_answers: Dict[str, float] = self._generate_answers()
        self.is_solved: Dict[str, bool] = {expr: False for expr in self.correct_answers}
        self.current_task_number = 0

    def _generate_answers(self) -> Dict[str, float]:
        """
        Generates a dictionary the correct answers for all possible tasks

        Returns:
            Dict[str, float]: A dictionary with the expression as key and the correct answer as value
        """
        return {
            f"{self.num1} {op} {self.num2}": evaluate_expression(f"{self.num1} {op} {self.num2}")
            for op in self.operations
        }

    @property
    def get_current_task(self) -> str:
        """
        Gets the current task for the agent

        Returns:
            str: A string containing the current task
        """
        return f"{str(self.num1)} {self.operations[self.current_task_number]} {str(self.num2)}"

    @property
    def instruction(self) -> str:
        """
        Gets a string containing instructions for the current task for the agent. (This will be fed to the agent as a user prompt)

        Returns:
            str: A string containing the instructions for the current task
        """
        return f"Calculate the result of the following expression: {str(self.num1)} {self.operations[self.current_task_number]} {str(self.num2)}. Give your final answer in the format: <answer>NUMBER</answer>, where NUMBER is a numerical value"

    def check_solved(self) -> bool:
        """
        Checks if all tasks have been solved

        Returns:
            bool: True if all tasks have been solved, False otherwise
        """
        return all(self.is_solved.values())

    def check_answer(self, model_answer: str) -> bool:
        """
        Checks if the model's answer is correct

        Args:
            model_answer (str): The model's answer

        Returns:
            bool: True if the model's answer is correct, False otherwise
        """

        correct_answer = self.correct_answers[self.get_current_task]
        return math.isclose(
            float(model_answer), correct_answer, rel_tol=1e-5, abs_tol=1e-8
        )

    def update_current_task(self):
        """
        Sets is_solved for the current task to True and increments self.current_task_number by one
        """
        self.is_solved[self.get_current_task] = True
        self.current_task_number = (self.current_task_number + 1) % len(self.operations)

In [15]:
class SimpleAgent:
    def __init__(
        self,
        task: Any = None,
        model: Literal["gpt-4o-mini"] = "gpt-4o-mini",
        tools: Optional[List[Any]] = None,
        chat_history: Optional[List[dict]] = None,
    ):
        self.model = model
        self.task = task
        self.tools = tools
        self.client = OpenAI()
        self.chat_history = chat_history if chat_history else []

    @retry_with_exponential_backoff
    def get_response(self, use_tool: bool = True) -> ChatCompletionMessage:
        """
        Get the response from the model via an API call, with the option of tool calling.

        Args:
            use_tool (bool): Whether to use tool calling or not

        Returns:
            ChatCompletionMessage: The response from the model
        """
        # get_response(): This should make an API call and return the ChatCompletionMessage from the model. It should be able to either use tool calling or not, depending on the use_tool argument).

        # Prepare the parameters for the API call
        params = {
            "model": self.model,
            "messages": self.chat_history,
        }

        if use_tool:
            params["tools"] = [tool.description for tool in self.tools]
            params["tool_choice"] = "auto"

        response = self.client.chat.completions.create(**params)

        return response.choices[0].message

    def execute_tool_calls(self, message: ChatCompletionMessage) -> List[str]:
        """
        Execute the tool calls in the message and return a list of tool_responses.

        Args:
            message (ChatCompletionMessage): The message containing the tool calls

        Returns:
            List[str]: A list of tool responses (as strings, we'll format them correctly in run())
        """
        # This should execute the tool calls in the message and return a list of tool responses as strings (we can format them correctly in run()).

        tool_calls = message.tool_calls
        tool_responses = []

        for tool_call in tool_calls:
            if not self.task:
                raise ValueError("Task is not set. Cannot execure tool calls.")
            func = next(
                (tool for tool in self.tools if tool.name == tool_call.function.name)
            )
            arguments = json.loads(tool_call.function.arguments)
            tool_response = func.execute(**arguments, task=self.task)
            tool_responses.append(tool_response)

        return tool_responses

    def run(self, with_tool: bool = True) -> ChatCompletionMessage:
        """
        Default implementation of run method.
        This can be overridden in subclasses for specific behavior.

        Args:
            with_tool (bool): Whether to use tool calling or not

        Returns:
            str: The response from the model
        """
        print(f"Running SimpleAgent...")
        instruction = self.task.instruction
        self.chat_history.append(apply_user_format(instruction))
        response = self.get_response(use_tool=with_tool)
        return response

tests.test_execute_tool_calls(SimpleAgent, CalculateTool, ArithmeticTask)

Test execute_tool_calls passed successfully!


#### Arithemtic Agent

In [None]:
class ArithmeticAgent(SimpleAgent):
    """
    ArithmeticAgent class for doing simple arithmetic tasks.

    Inherits from SimpleAgent which includes the following attributes and methods:

    Attributes:
        model (str): The model used for generating responses (inherited)
        tool_descriptions (List[dict]): List of tool descriptions (inherited)
        client (OpenAI): OpenAI client for API calls (inherited)
        task (Any): The current task being executed (inherited)
        chat_history (List[dict]): History of interactions (inherited)

    Methods:
        get_response(use_tool: bool = True) -> ChatCompletionMessage:
            Get response from the model (inherited)

        execute_tool_calls(message: ChatCompletionMessage) -> List[str]:
            Execute tool calls from the model's response (inherited)

        run(with_tool: bool = True) -> bool:
            Run one loop of the Wikipedia agent
    """

    def __init__(
        self,
        model: Literal["gpt-4o-mini"] = "gpt-4o-mini",
        task: Any = None,
        tools: Optional[List[Any]] = [Calculator],
        chat_history: List[dict] = None,
        verbose: bool = True,
    ):
        super().__init__(model=model, task=task, tools=tools, chat_history=chat_history)
        self.verbose = verbose

    def handle_tool_calls(self, response: ChatCompletionMessage):
        """
        Handle the tool calls from the model response. This function should:
        - Execute the tool calls
        - Append the tool calls and responses to the chat history

        Args:
            response (ChatCompletionMessage): The response from the model
        """
        pass

    def handle_refusal(self, response: ChatCompletionMessage):
        """
        Handle the refusal from the model response. This function should only be called if the model refuses to answer and should:
        - Append the refusal to the chat history
        - Update the task state

        Args:
            response (ChatCompletionMessage): The response from the model
        """
        pass

    def generate_and_check_final_answer(self) -> Literal["Correct", "Incorrect"]:
        """
        This function should:
        - Get the model to generate a final answer to the question (after it has seen the tool response)
        - Then check this final answer against the correct answer.
        - If the answer is correct, update the task state.
        - Then append to chat history (and return) "Correct" if the answer is correct and "Incorrect" if the answer is incorrect.

        Args:
            None

        Returns:
            str: "Correct" or "Incorrect"
        """

        pass

    def run(self, with_tool: bool):
        """
        Run one loop of the agent, which involves:
        - getting a task
        - getting a response from the model
        - handling the model response, including tool calls, refusals, no tool calls, parsing and checking final answers, errors.
        - managing memory: storing the history of messages to self.chat_history
        - managing task state: staying on the same task or moving to the next task at the end of the loop
        """
        pass

    def parse_answer(self, message: ChatCompletionMessage) -> float:
        """
        Extract the numerical answer from the string output of the model

        Args:
            message (ChatCompletionMessage): The response from the model

        Returns:
            float: The numerical answer extracted from the model
        """
        return float(response[startpoint:endpoint])

## Wikipeida Agent

In [16]:
#Retrieve a Wikipedia page from its title
page = wikipedia.page("Large language model")

# Access basic page information
print("Title:", page.title)
print("\nURL", page.url)
print(f"\nSummary (word count {len( page.summary.split())}):", page.summary)
print(
    f"\nContent (word count {len( page.content.split())}):",
    page.content[:1000],
    "......",
)
print(
    f"""\nLinks (link count {len(page.links)}): [{", ".join(page.links[:7])}, ......]"""
)

Title: Large language model

URL https://en.wikipedia.org/wiki/Large_language_model

Summary (word count 166): A large language model (LLM) is a computational model capable of language generation or other natural language processing tasks. As language models, LLMs acquire these abilities by learning statistical relationships from vast amounts of text during a self-supervised and semi-supervised training process.
The largest and most capable LLMs, as of August 2024, are artificial neural networks built with a decoder-only transformer-based architecture, which enables efficient processing and generation of large-scale text data. Modern models can be fine-tuned for specific tasks or can be guided by prompt engineering. These models acquire predictive power regarding syntax, semantics, and ontologies inherent in human language corpora, but they also inherit inaccuracies and biases present in the data they are trained on.
Some notable LLMs are OpenAI's GPT series of models (e.g., GPT-3.5, G

In [17]:
# Fixes PageError by allowing redirects
page = wikipedia.page("Animalss", redirect=True)
print(page.title)


# Fixes DisambiguationError by selecting the first option
try:
    page = wikipedia.page("Python")
except DisambiguationError as e:
    page = wikipedia.page(e.options[0])
print(page.title)

Animal




  lis = BeautifulSoup(html).find_all('li')


Pythonidae


In [18]:
def get_permitted_links(current_page: WikipediaPage) -> list[str]:
    """
    Get "permitted" links (i.e. links that are in the content of the page) from a Wikipedia page.

    Args:
        current_page (WikipediaPage): The current Wikipedia page

    Returns:
        list[str]: A list of permitted links from current_page

    """
    links = current_page.links
    permitted_links = [l for l in links if l in current_page.content]
    return permitted_links
    # return links

In [19]:
class WikiGame:
    def __init__(
        self,
        starting_page: str,
        goal_page: str,
    ):
        """
        Initialize the Wikipedia game object.

        Args:
            starting_page (str): The page the agent starts on.
            goal_page (str): The page the agent is trying to reach.
        """
        self.page_history: List[str] = [starting_page]
        self.starting_page: WikipediaPage = self.get_page(starting_page)
        self.goal_page: WikipediaPage = self.get_page(goal_page)
        self.current_page: WikipediaPage = self.starting_page

    # ========================= Helper Functions (given) =========================

    # Get page and page summary
    @staticmethod
    def get_page(title: str) -> WikipediaPage:
        """
        Get a Wikipedia page object given a title. If the title is ambiguous, choose the first option. If the title is not found, try to find a similar title.

        Args:
            title (str): The title of the Wikipedia page

        Returns:
            WikipediaPage: The Wikipedia page
        """
        try:
            return wikipedia.page(title, auto_suggest=False, redirect=True)
        except DisambiguationError as e:
            return wikipedia.page(e.options[0], auto_suggest=False, redirect=True)
        except PageError as e:
            return wikipedia.page(title, auto_suggest=True, redirect=True)

    def get_page_summary(self, page: WikipediaPage | None = None) -> str:
        """
        Get summary of a wikipedia page, to the last full stop within the first 500 characters. This is used to give a brief overview of the page to the agent.

        Args:
            page (WikipediaPage): The Wikipedia page object.

        Returns:
            str: The summary of the Wikipedia page.
        """
        page = page if page else self.goal_page
        summary = page.content[:500]
        last_period_index = summary.rfind(".")
        return summary[: last_period_index + 1] if last_period_index != -1 else summary

    # Get and check permitted links
    def get_permitted_links(self, title: Optional[str] = None) -> list[str]:
        """
        Returns a list of permitted links (i.e. links in the main page content) for the current page.

        Args:
            title (Optional[str]): The title of the Wikipedia page. If None, uses the current page.

        Returns:
            list[str]: The permitted links.
        """
        if title:
            page = self.get_page(title)
            all_links = page.links
            content = page.content
            permitted_links = [link for link in all_links if link in content]
            if title in permitted_links:
                permitted_links.remove(title)
        else:
            all_links = self.current_page.links
            content = self.current_page.content
            permitted_links = [link for link in all_links if link in content]
            if self.current_page.title in permitted_links:
                permitted_links.remove(self.current_page.title)
        return permitted_links

    def is_permitted_link(self, link: str) -> bool:
        """
        Returns True if the link is in the permitted links for the current page, False otherwise.

        Args:
            link (str): The link to check.

        Returns:
            bool: True if the link is permitted, False otherwise
        """
        return link.lower() in (x.lower() for x in self.get_permitted_links())

    # ========================= Task State Management (to implement) =========================

    @property
    def system_instruction(self) -> dict:
        """
        Generate the starting instructions for the game.

        Returns:
            dict: The starting instructions. "role" is "system" for system messages.
        """
        return {
            "role": "system",
            "content": "You are a wikipedia-racing AI. Your aim is to reach the goal page by accessing links from a series of wikipedia pages.",
        }

    @property
    def on_page_instruction(self) -> dict:
        """
        Generate instructions for the current page.

        Returns:
            dict: The instructions for the current page. "role" is "user" for user messages.
        """
        return {
            "role": "user",
            "content": f"""You are currently on page: {self.current_page.title}. Your goal page is {self.goal_page.title}.""",
        }

    @property
    def next_step_instruction(self) -> dict:
        """
        Generate instructions for the next step.

        Returns:
            dict: The instructions for the next step. "role" is "user" for user messages.
        """
        return {
            "role": "user",
            "content": f"What's your next step?",
        }

    def check_win(self) -> bool:
        return self.current_page == self.goal_page

In [20]:
class GetContentTool():
    name = "get_content"

    @staticmethod
    def execute(task: WikiGame | Any) -> str:
        """
        Get all the content for the wikipedia page you are currently on. Anything which corresponds to a link is wrapped in <link></link> tags.

        Args:
            task (WikiGame | Any): The current task object.

        Returns:
            str: The content of the page with links wrapped
        """
        content = task.current_page.content
        permitted_links = get_permitted_links(task.current_page)
        for word in sorted(permitted_links, key=len, reverse=True):
            content = re.sub(
                r"""(\s|[,.)!?;:'"])(""" + re.escape(word) + r""")(\s|[,.)!?;:'"s])""",
                r"\1<link>\2</link>\3",
                content,
                count=1,
                flags=re.IGNORECASE,
            )
        return content

    @property
    def description(self):
        """
        Provides the description of the get_content tool

        Returns:
            dict: The description of the tool for the API
        """
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": "Get all the content for the wikipedia page you are currently on. Anything which corresponds to a link you can select to move to will be wrapped in <link></link> tags.",
                "parameters": {
                    "type": "object",
                    "properties": {},
                    "required": [],
                },
            },
        }


class MovePageTool():
    name = "move_page"

    @staticmethod
    def execute(new_page: str, task: Any) -> str:
        """
        Changes your current page to a specified new page which is accessible via a link from the current page. You can only call this function once at a time, as it will take you to a different page.

        Args:
            task (WikiGame): The current task object.
            new_page (str): The title of the new page to move to.

        Returns:
            str: A message indicating the result of the move
        """
        new_page_normalized = new_page.replace("_", " ")
        if task.is_permitted_link(new_page_normalized):
            task.current_page = task.get_page(new_page_normalized)
            task.page_history.append(task.current_page.title)
            return f"Moving page to {task.current_page.title}"
        else:
            return f"Couldn't move page to {new_page}. This is not a valid link."

    @property
    def description(self):
        """
        Provides the description of the move_page tool

        Returns:
            dict: The description of the move_page tool for the API
        """
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": "Changes your current page to a specified new page which is accessible via a link from the current page. You can only call this function once at a time, as it will take you to a different page.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "new_page": {
                            "type": "string",
                            "description": 'The title of the new page you want to move to. This should be formatted the way the title appears on wikipedia (e.g. to move to the wikipedia page for the United States of America, you should enter "United States"). Underscores are not necessary.',
                        }
                    },
                    "required": ["new_page"],
                },
            },
        }
    
get_content_tool_inst = GetContentTool()
move_page_tool_inst = MovePageTool()
wiki_game_tools = [get_content_tool_inst, move_page_tool_inst]

#### Build the Agent

In [21]:
class WikiAgent(SimpleAgent):
    """
    Inherits from SimpleAgent and adds the ability to handle tool calls and refusals in the Wikipedia game context.

    Attributes:
        model (str): The model used for generating responses (inherited)
        tools (List[Any]): List of tools (inherited)
        client (OpenAI): OpenAI client for API calls (inherited)
        task (Any): The current task being executed
        chat_history (List[dict]): History of interactions (inherited)

    Methods:
        get_response(use_tool: bool = True) -> ChatCompletionMessage:
            Get response from the model (inherited)

        execute_tool_calls(message: ChatCompletionMessage) -> List[str]:
            Execute tool calls from the model's response (inherited)

        run(with_tool: bool = True) -> bool:
            Run one loop of the Wikipedia agent (modified below)

    """

    def __init__(
        self,
        task: Any,
        tools: List[Any],
        model="gpt-4o-mini",
        chat_history: List[dict] = None,
        verbose: bool = True,
    ):
        super().__init__(model=model, tools=tools, task=task)

        self.chat_history = chat_history if chat_history else []
        self.full_chat_history = (
            chat_history if chat_history else []
        )  # All messages that have been sent in the chat history. We have to erase each time a new page is reached for context window reasons.
        self.verbose = verbose
        self.start()

    def update_history(
        self, message: str | ChatCompletionMessage | List[str | ChatCompletionMessage]
    ):
        """
        Update self.chat_history and self.full_chat_history with a message or list of messages.

        Args:
            message (str | List[str]): The message to add to the chat history
        """
        if isinstance(message, list):
            self.chat_history.extend(message)
            self.full_chat_history.extend(message)
        else:
            self.chat_history.append(message)
            self.full_chat_history.append(message)

    def reset_history(self):
        """
        Empty self.chat_history of the agent.
        """
        self.chat_history = []

    def handle_tool_calls(self, response: ChatCompletionMessage):
        """
        Handles tool_calls in the wikipedia game context:
            - Executes the tool calls using execute_tool_calls
            - Appends the original tool call & tool_responses to the chat_history
            - If the agent has moved to a new page, resets the chat_history
            - If not, get the next_step_message instruction from the task and append it to chat_history

        Args:
            response (ChatCompletionMessage): The response from the model
        """

        # Update history
        self.update_history(response)

        # Execute the tool calls
        tool_responses = self.execute_tool_calls(response)

        # Add tool calls and responses to the history
        for tool_call, tool_response in zip(response.tool_calls, tool_responses):
            self.update_history(apply_tool_call_format(tool_call, tool_response))

            if self.verbose:
                print(
                    f"\nTOOL CALL: \nTool = {tool_call.function.name}, Args = {tool_call.function.arguments} \nTOOL RESPONSE:\n {tool_response[:300]}"
                )

        # Move to new page if necessary
        if any("Moving page" in tool_response for tool_response in tool_responses):
            self.reset_history()
            print(
                f"""{("-" * 50)} \n\nMOVED TO PAGE \n\nPATH HISTORY (N={len(self.task.page_history)}): {" -> ".join(self.task.page_history)} \n\n{("-"*50)}"""
            )

            # Give starting instructions if moved to a new page
            self.start()

        # Otherwise ask the agent what the next step is

        else:
            next_step_message = self.task.next_step_instruction
            self.update_history(next_step_message)
            if self.verbose:
                print(f"""\nUSER: \n{next_step_message["content"]}""")

    def handle_refusal(self, response: ChatCompletionMessage):
        """
        Handles refusals in the wikipedia game context:

        Args:
            response (ChatCompletionMessage): The response from the model
        """
        self.update_history(apply_assistant_format(response.refusal))
        if self.verbose:
            print(f"\nMODEL REFUSAL: {response.refusal}")

    def start(self):
        """
        A function to put the starting instructions in agent.chat_history when the agent starts a new page or starts the game.
        """
        instruction_messages = [
            self.task.system_instruction,
            self.task.on_page_instruction,
        ]
        self.update_history(instruction_messages)
        if self.verbose:
            print(
                f"\nSYSTEM: \n{instruction_messages[0]['content']} \n\nUSER: \n{instruction_messages[1]['content']}"
            )

    def run(self):
        """
        This function runs the agent in the wikipedia game context. It:
            - Gets the current task instruction
            - Gets the response from the model
            - Handles the response in the cases:
                - tool calls (using handle_tool_calls)
                - refusals (using handle_refusal)
                - no tool calls (using update_history)
        """

        # Get the response from the model
        response = self.get_response()

        # Handle the response
        ## If tool calls, do the tool calls and return the response
        if response.tool_calls:
            self.handle_tool_calls(response)

        ## If no tool call: Handle edge cases
        ### Check if there's a refusal to answer:
        elif response.refusal:
            self.handle_refusal(response)

        # Else response content does not contain tool calls or refusal, and we add it to the chat_history in an assistant format.
        else:
            self.update_history(apply_assistant_format(response.content))
            if self.verbose:
                print(f"\nMODEL RESPONSE: \n{response.content}")

In [22]:
def agent_loop(agent, game, num_loops=10):
    """
    Run the agent loop for a given number of loops

    Args:
        agent (WikiAgent): The agent to run
        game (WikiGame): The game to play
        num_loops (int): The number of loops to run
    """

    for _ in range(num_loops):
        if game.check_win():
            print("Success!")
            return
        agent.run()

In [23]:
game_1 = WikiGame("Barack Obama", "India")
agent = WikiAgent(task=game_1, tools=wiki_game_tools)
agent_loop(agent, game_1, 10)


SYSTEM: 
You are a wikipedia-racing AI. Your aim is to reach the goal page by accessing links from a series of wikipedia pages. 

USER: 
You are currently on page: Barack Obama. Your goal page is India.


KeyboardInterrupt: 

## Elicitation

#### Basic Agent

In [24]:
class WikiGamePrompting(WikiGame):
    """
    Inherits from WikiGame and adds improved prompting.

    Attributes:
        starting_page (str): The title of the starting page (inherited)
        goal_page (str): The title of the goal page (inherited)
        current_page (WikipediaPage): The current Wikipedia page (inherited)
        page_history (List[str]): The history of pages visited (inherited)

    Methods:
        get_page(title: str) -> WikipediaPage: Get a Wikipedia page object given a title (inherited)

        get_page_summary(page: WikipediaPage | None = None) -> str: Get the summary of a Wikipedia page (inherited)

        get_permitted_links(title: Optional[str] = None) -> list[str]: Get permitted links for the current page (inherited)

        is_permitted_link(link: str) -> bool: Check if a link is permitted (inherited)

        system_instruction -> dict: Generate the starting instructions for the game (modified below)

        on_page_instruction -> dict: Generate instructions for the current page (modified below)

        next_step_instruction -> dict: Generate instructions for the next step (modified below)

        check_win() -> bool: Check if the game has been won (inherited)

    """

    @property
    def system_instruction(self):
        """
        Provide improved starting instructions for the game.

        Returns:
            dict: The starting instructions. "role" is "system" for system messages.
        """
        return {
            "role": "system",
            "content": f"You are a wikipedia-racing AI. Your goal is to reach {self.goal_page.title} by accessing links from wikipedia pages. Your current page is {self.current_page.title}.",
        }

    @property
    def on_page_instruction(self):
        """
        Provide improved instructions for the current page.

        Returns:
            dict: The instructions for the current page. "role" is "user" for user messages.
        """
        return {
            "role": "user",
            "content": f"""You are currently on page: {self.current_page.title}. Make sure you start by reasoning about what steps you should take to get to the article on {self.goal_page.title}. When coming up with a strategy, make sure to pay attention to the path you have already taken, and if your current strategy doesn't seem to be working out, try something else. In case you're unsure, {self.goal_page.title} has the following summary:\n\n[Begin Summary]\n{self.get_page_summary(self.goal_page)}\n[End Summary]\n\nThe path you have taken so far is {" -> ".join(self.page_history)}.
            """,
        }

    @property
    def next_step_instruction(self):
        """
        Provide improved instructions for the next step.

        Returns:
            dict: The instructions for the next step. "role" is "user" for user messages.
        """

        return {
            "role": "user",
            "content": f"What's your next step to get to {self.goal_page.title}?",
        }

In [25]:
# Original WikiGame and WikiAgent
game = WikiGamePrompting("Linux", "Animals")
agent = WikiAgent(game, model="gpt-4o-mini", tools=wiki_game_tools)
agent_loop(agent, game, 30)




SYSTEM: 
You are a wikipedia-racing AI. Your goal is to reach Animal by accessing links from wikipedia pages. Your current page is Linux. 

USER: 
You are currently on page: Linux. Make sure you start by reasoning about what steps you should take to get to the article on Animal. When coming up with a strategy, make sure to pay attention to the path you have already taken, and if your current strategy doesn't seem to be working out, try something else. In case you're unsure, Animal has the following summary:

[Begin Summary]
Animals are multicellular, eukaryotic organisms in the biological kingdom Animalia (). With few exceptions, animals consume organic material, breathe oxygen, have myocytes and are able to move, can reproduce sexually, and grow from a hollow sphere of cells, the blastula, during embryonic development. Animals form a clade, meaning that they arose from a single common ancestor.
Over 1.5 million living animal species have been described, of which around 1.
[End Summar

KeyboardInterrupt: 

#### ReAct Agent

In [26]:
class WikiGameReAct(WikiGamePrompting):
    """
    Inherits from WikiGame and adds the ReAct framework.

    Attributes:
        starting_page (str): The title of the starting page (inherited)
        goal_page (str): The title of the goal page (inherited)
        current_page (WikipediaPage): The current Wikipedia page (inherited)
        page_history (List[str]): The history of pages visited (inherited)

    Methods:

        get_page(title: str) -> WikipediaPage: Get a Wikipedia page object given a title (inherited)

        get_page_summary(page: WikipediaPage | None = None) -> str: Get the summary of a Wikipedia page (inherited)

        get_permitted_links(title: Optional[str] = None) -> list[str]: Get permitted links for the current page (inherited)

        is_permitted_link(link: str) -> bool: Check if a link is permitted (inherited)

        system_instruction -> dict: Generate the starting instructions for the game (inherited)

        on_page_instruction -> dict: Generate instructions for the current page (inherited)

        next_step_instruction -> dict: Generate instructions for the next step (inherited)

        check_win() -> bool: Check if the game has been won (inherited)

    """

    def __init__(self, starting_page: str, goal_page: str, tools=None):
        super().__init__(starting_page, goal_page)
        self.tools = tools

    @property
    def system_instruction(self):
        """
        Provided a description of the tools in the system message. When generate is called with tools this is redundant, but when generate is called without tools, this is useful.

        Returns:
            dict: The starting instructions. "role" is "system" for system messages.
        """
        tool_descriptions = "\n".join([tool.description["function"]["name"] + ":" + tool.description["function"]["description"] for tool in self.tools])
        return {
            "role": "system",
            "content": f"""You are a wikipedia-racing AI. Your goal is to reach {self.goal_page.title} by accessing links from wikipedia pages. Your current page is {self.current_page.title}. You have access to {str(len(self.tools))} tools, which are:\n{tool_descriptions}""",
        }


class WikiAgentReAct(WikiAgent):
    """
    Inherits from WikiAgent and adds the ReAct framework.

    Attributes:
        model (str): The model used for generating responses (inherited)
        tools (List[Any]): List of tools (inherited)
        client (OpenAI): OpenAI client for API calls (inherited)
        task (Any): The current task being executed (inherited)
        chat_history (List[dict]): History of interactions (inherited)

    Methods:
        get_response(use_tool: bool = True) -> ChatCompletionMessage: Get response from the model (inherited)

        execute_tool_calls(message: ChatCompletionMessage) -> List[str]: Execute tool calls from the model's response (inherited)

        run(with_tool: bool = True) -> bool: Run one loop of the Wikipedia agent (inherited)

        update_history(message : str | ChatCompletionMessage | List[str | ChatCompletionMessage]): Update self.chat_history and self.full_chat_history with a message or list of messages. (inherited)

        reset_history(): Empty self.chat_history of the agent. (inherited)

        handle_tool_calls(response: ChatCompletionMessage): Handles tool_calls in the wikipedia game context. (inherited)

        handle_refusal(response: ChatCompletionMessage): Handles refusals in the wikipedia game context. (inherited)

        start(): A function to put the starting instructions in agent.chat_history when the agent starts a new page or starts the game. (inherited)

        run(): This function runs the agent in the wikipedia game context. (inherited)


    """

    def generate_reason(self) -> ChatCompletionMessage:
        # Get the model to reason about the current state of the game and add the response to the messages (you may not want to give it tools for this)
        self.chat_history.append(
            apply_user_format(
                "Think carefully about your current situation and what actions you want to take to get closer to"
                + self.task.goal_page.title
                + "."
            )
        )
        response = self.get_response(use_tool=False)
        return response

    def generate_action(self) -> ChatCompletionMessage:
        # Get the model to generate an action based on the reasoning and add the response to the messages
        self.chat_history.append(apply_user_format("What action do you want to take?"))
        response = self.get_response(use_tool=True)
        return response

    def generate_reason_and_action(self):
        """
        Generate a reason, store this in history, then generate and return an action.
        """
        reason = self.generate_reason()
        self.update_history(apply_assistant_format(reason.content))
        print("\nModel response ('Reason'):", reason.content)

        action = self.generate_action()

        return action

    def run(self):
        """
        Run one loop of the agent.

        This function should:
            - Generate a Reason and Action
            - Handle the tool calls, refusals, and no tool calls in the model response
        """
        response = self.generate_reason_and_action()

        if response.tool_calls:
            self.handle_tool_calls(response)
        elif response.refusal:
            self.handle_refusal(response)

In [27]:
def agent_loop_ReAct(game, agent, num_loops = 10):
    """
    Run the agent loop for a given number of loops with the ReAct framework.

    Args:
        agent (WikiReActAgent): The agent to run
        game (WikiGameReAct): The game to play
        num_loops (int): The number of loops to run
    """
    agent.start()
    for _ in range(num_loops):
        if game.check_win():
            print("Success")
            return
        agent.run()

In [44]:
# WikiGame and WikiAgent with improved prompting
# game = WikiGamePrompting("September 11 attacks", "Anal Sex")
game = WikiGamePrompting("Linux", "September 11 attacks")
agent = WikiAgent(task=game, tools=wiki_game_tools)
agent_loop(agent, game, 40)


SYSTEM: 
You are a wikipedia-racing AI. Your goal is to reach September 11 attacks by accessing links from wikipedia pages. Your current page is Linux. 

USER: 
You are currently on page: Linux. Make sure you start by reasoning about what steps you should take to get to the article on September 11 attacks. When coming up with a strategy, make sure to pay attention to the path you have already taken, and if your current strategy doesn't seem to be working out, try something else. In case you're unsure, September 11 attacks has the following summary:

[Begin Summary]
The September 11 attacks, commonly known as 9/11,  were four coordinated Islamist terrorist suicide attacks carried out by al-Qaeda against the United States in 2001. On that Tuesday morning, 19 terrorists organized into 4 teams hijacked four commercial airliners scheduled to travel from the East Coast to California.
[End Summary]

The path you have taken so far is Linux.
            

TOOL CALL: 
Tool = get_content, Args =

In [37]:
# WikiGame and WikiAgent with ReAct
game = WikiGameReAct("Linux", "September 11 attacks", tools=wiki_game_tools)
agent = WikiAgentReAct(game, model="gpt-4o-mini", tools=wiki_game_tools)
agent_loop_ReAct(game, agent,40)


SYSTEM: 
You are a wikipedia-racing AI. Your goal is to reach September 11 attacks by accessing links from wikipedia pages. Your current page is Linux. You have access to 2 tools, which are:
get_content:Get all the content for the wikipedia page you are currently on. Anything which corresponds to a link you can select to move to will be wrapped in <link></link> tags.
move_page:Changes your current page to a specified new page which is accessible via a link from the current page. You can only call this function once at a time, as it will take you to a different page. 

USER: 
You are currently on page: Linux. Make sure you start by reasoning about what steps you should take to get to the article on September 11 attacks. When coming up with a strategy, make sure to pay attention to the path you have already taken, and if your current strategy doesn't seem to be working out, try something else. In case you're unsure, September 11 attacks has the following summary:

[Begin Summary]
The Se

#### Reflexion Agent

In [38]:
class TestPathTool():
    """
    Implements a tool that allows the agent to test paths from the current state of the game.

    Attributes:
        name (str): The name of the tool

    Methods:
        execute(task: Any, path: str) -> str: Test if a given path is valid.

        description -> dict: Provides the description of the TestPathTool tool for the API
    """

    name = "test_path"    

    def execute(self, task: Any, path: str) -> str:
        """
        Test if a given path is valid.

        Args:
            path (str): A string representing a path, e.g., "Barack Obama -> Indonesia -> India"

        Returns:
            str: A message indicating whether the path is valid or where it fails.
        """
        
        path_nodes = [node.strip() for node in path.split("->")]

        if not path_nodes:
            return "ERROR: Empty path provided."
        
        if path_nodes[0] != task.curret_page.title:
            return f"ERROR: The path should start with the current page: {task.current_page.title}"
        
        for i in range(len(path_nodes) - 1):
            current_node = path_nodes[i]
            next_node = path_nodes[i + 1]

            permitted_links = set(link.lower() for link in task.get_permitted_links(current_node))

            if next_node.lower() not in permitted_links:
                return f"This path works only until {next_node}, which is not accessible from {current_node}"
        return "This path is valid."

    @property
    def description(self) -> dict:
        return {
            "type": "function",
            "function": {
                "name": "test_path",
                "description": "Accepts a test path string in the form \"current_page -> page1 -> page2 -> ... -> pageN\" and if the path does not work, then it returns where the path goes wrong, if the path does work it returns \"success.\" Be careful that path titles can be sensitive to plurals or rephrasings. This tool is especially useful to check longer plans.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "path": {
                            "type": "string",
                            "description": "The path you want to test, formatted as \" current_page -> page1 -> page2 -> ... -> pageN\"."
                        },
                    },
                    "required" : ["path"]
                }
            }
        }

TestPathTool_inst = TestPathTool()
wiki_game_tools = [get_content_tool_inst, move_page_tool_inst, TestPathTool_inst]

In [39]:
game = WikiGameReAct("Linux", "September 11 attacks", tools = wiki_game_tools)
agent = WikiAgentReAct(game, model="gpt-4o-mini", tools = wiki_game_tools)
agent_loop_ReAct(game,agent, 40)


SYSTEM: 
You are a wikipedia-racing AI. Your goal is to reach September 11 attacks by accessing links from wikipedia pages. Your current page is Linux. You have access to 3 tools, which are:
get_content:Get all the content for the wikipedia page you are currently on. Anything which corresponds to a link you can select to move to will be wrapped in <link></link> tags.
move_page:Changes your current page to a specified new page which is accessible via a link from the current page. You can only call this function once at a time, as it will take you to a different page.
test_path:Accepts a test path string in the form "current_page -> page1 -> page2 -> ... -> pageN" and if the path does not work, then it returns where the path goes wrong, if the path does work it returns "success." Be careful that path titles can be sensitive to plurals or rephrasings. This tool is especially useful to check longer plans. 

USER: 
You are currently on page: Linux. Make sure you start by reasoning about wh

In [None]:
name = "test_path"

def execute(self, task: Any, path: str) -> str:

    path_nodes = [node.strip() for node in path.split("->")]

    if not path_nodes:
        return "ERROR: Empty path provided."

    if path_nodes[0] != task.current_page.title:
        return f"ERROR: The path should start with the current page: {task.current_page.title}"

    for i in range(len(path_nodes) - 1):
        current_node = path_nodes[i]
        next_node = path_nodes[i + 1]

        permitted_links = set(link.lower() for link in task.get_permitted_links(current_node))

        if next_node.lower() not in permitted_links:
            return f"This path works until {next_node}, which is not accessible from {current_node}"

    return "This path is valid."

@property
def description(self):
    return {
        "type" : "function",
        "function" : {
            "name" : "test_path",
            "description" : "Accepts a test path string in the form \"current_page -> page1 -> page2 -> ... -> pageN\" and if the path does not work, then it returns where the path goes wrong, if the path does work it returns \"success.\" Be careful that path titles can be sensitive to plurals or rephrasings. This tool is especially useful to check longer plans.",
            "parameters" : {
                "type" : "object",
                "properties": {
                    "path" : {
                        "type" : "string",
                        "description" : "The path you want to test, formatted as \" current_page -> page1 -> page2 -> ... -> pageN\"."
                    },
                },
                "required" : ["path"]
            }
        }
    }

#### Agent with access to entire chat history

In [40]:
class WikiAgentChatHistory(WikiAgentReAct):
    """
    Inherits from WikiAgentReAct and adds the ability to store and retrieve chat history.

    Attributes:
        model (str): The model used for generating responses (inherited)
        tools (List[Any]): List of tools (inherited)
        client (OpenAI): OpenAI client for API calls (inherited)
        task (Any): The current task being executed (inherited)
        chat_history (List[dict]): History of interactions (inherited)
        full_chat_history (List[dict]): Full history of interactions

    Methods:
        - get_response(use_tool: bool = True) -> ChatCompletionMessage: Get response from the model (inherited)
        - execute_tool_calls(message: ChatCompletionMessage) -> List[str]: Execute tool calls from the model's response (inherited)
        - update_history(message : str | ChatCompletionMessage | List[str | ChatCompletionMessage]): Update self.chat_history and self.full_chat_history with a message or list of messages. (inherited)
        - reset_history(): Empty self.chat_history of the agent. (modified below)
        - handle_tool_calls(response: ChatCompletionMessage): Handles tool_calls in the wikipedia game context. (inherited)
        - handle_refusal(response: ChatCompletionMessage): Handles refusals in the wikipedia game context. (inherited)
        - start(): A function to put the starting instructions in agent.chat_history when the agent starts a new page or starts the game. (inherited)
        - run(): This function runs 1 loop of the agent in the wikipedia game. (inherited)
        - store_chat_history(): Store the current chat history in the full chat history.
        - retrieve_chat_history(): Retrieve the full chat history.
    """
    def reset_history(self):
        """
        Replace the output of the get_content tool responses with "Wikipedia Content was output here" when the agent moves to a new page.

        This function should only be called if the agent moved to a new page. It should:
            - Look through the messages in the chat history
            - Determine if a message is a get_content tool call response
            - Replace the output of the get_content tool response with "Wikipedia Content was output here"
        """
        text_placeholder = "Wikipedia Content was output here."

        # go over the messages in the chat history 
        for message in self.chat_history:
            if isinstance(message, dict):
                # see if the mesasgse in is `get_content` 
                if message['role'] == "tool" and message["name"] == "get_content" and message["content"] != text_placeholder:
                    # replace the output of hte get_content tool response with "Wikipedia Content was output here"
                    message["content"] = text_placeholder
                else:
                    pass
            else:
                pass

In [46]:
game = WikiGameReAct("Linux", "September 11 attacks", tools=wiki_game_tools)
# game = WikiGameReAct("September 11 attacks", "Anal Sex", tools=wiki_game_tools)
agent = WikiAgentChatHistory(game, model="gpt-4o-mini", tools = wiki_game_tools)
agent_loop_ReAct(game, agent, 40)


SYSTEM: 
You are a wikipedia-racing AI. Your goal is to reach September 11 attacks by accessing links from wikipedia pages. Your current page is Linux. You have access to 3 tools, which are:
get_content:Get all the content for the wikipedia page you are currently on. Anything which corresponds to a link you can select to move to will be wrapped in <link></link> tags.
move_page:Changes your current page to a specified new page which is accessible via a link from the current page. You can only call this function once at a time, as it will take you to a different page.
test_path:Accepts a test path string in the form "current_page -> page1 -> page2 -> ... -> pageN" and if the path does not work, then it returns where the path goes wrong, if the path does work it returns "success." Be careful that path titles can be sensitive to plurals or rephrasings. This tool is especially useful to check longer plans. 

USER: 
You are currently on page: Linux. Make sure you start by reasoning about wh