# Prompto-Agent

In this notebook a Zero Shot ReAct Agent with a custom template is used. This could be extended upon with a clever prompting strategy to query multiple times the retriever for better responses.

In [3]:
!pip install langchain openai GitPython chromadb unstructured markdown sentence_transformers


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
from dotenv import load_dotenv
load_dotenv()

True

In [5]:
from langchain.document_loaders import GitLoader

code_loader = GitLoader(
    clone_url="https://github.com/hpi-swa-lab/godot-pronto",
    repo_path="./pronto",
    branch="master",
    file_filter=lambda file_path: file_path.endswith(".gd")
)
code_documents = code_loader.load()

In [6]:
from langchain.document_loaders import UnstructuredMarkdownLoader

readme_loader = UnstructuredMarkdownLoader("pronto/README.md")
readme_document = readme_loader.load()

In [7]:
separators = [
    # First, try to split along class definitions
    "\nclass ",
    "\nfunc ",
    "\n\tfunc ",
    # Now split by the normal type of lines
    "\n\n",
    "\n",
    " ",
    "",
]

In [8]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, Language

In [9]:
code_splitter = RecursiveCharacterTextSplitter(separators=separators, chunk_size=512, chunk_overlap=200)
code_splits = code_splitter.split_documents(code_documents)

In [27]:
readme_splitter = RecursiveCharacterTextSplitter.from_language(language=Language.MARKDOWN, chunk_size=200, chunk_overlap=20)
readme_splits = readme_splitter.split_documents(readme_document)

In [11]:
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.embeddings import OpenAIEmbeddings

In [12]:
embeddings = OpenAIEmbeddings()

In [13]:
from langchain.vectorstores import Chroma

In [14]:
code_db = Chroma.from_documents(documents=code_splits, embedding=embeddings)
code_retriever = code_db.as_retriever(
    search_type="mmr",  # Also test "similarity"
    search_kwargs={"k": 4},
)

In [28]:
readme_db = Chroma.from_documents(documents=readme_splits, embedding=embeddings)
readme_retriever = readme_db.as_retriever(
    search_type="mmr",  # Also test "similarity",
    search_kwargs={"k": 8}
)

In [29]:
from langchain.agents.agent_toolkits import create_retriever_tool

code_tool = create_retriever_tool(
    code_retriever,
    "search_codebase",
    "Searches and returns code.",
)
readme_tool = create_retriever_tool(
    readme_retriever,
    "search_readme",
    "Searches and returns documentation"
)
tools = [code_tool, readme_tool]

In [17]:
# Set up the base template
template = """Complete the objective as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

These were previous tasks you completed:



Begin!

Question: {input}
{agent_scratchpad}"""

In [18]:
from typing import List
from langchain.schema import HumanMessage
from langchain.prompts import BaseChatPromptTemplate
from langchain.agents import Tool


# Set up a prompt template
class CustomPromptTemplate(BaseChatPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]

    def format_messages(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        formatted = self.template.format(**kwargs)
        return [HumanMessage(content=formatted)]

In [19]:
prompt = CustomPromptTemplate(
    template=template,
    tools=tools,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"]
)

In [20]:
from typing import Union
import re
from langchain.schema import AgentAction, AgentFinish
from langchain.agents import AgentOutputParser


class CustomOutputParser(AgentOutputParser):

    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)

In [21]:
output_parser = CustomOutputParser()

In [22]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0, )

In [23]:
from langchain.chains import LLMChain

# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm, prompt=prompt)

In [30]:
from langchain.agents import LLMSingleActionAgent

tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain,
    output_parser=output_parser,
    stop=["\nObservation:"],
    allowed_tools=tool_names
)

In [37]:
from langchain.agents import AgentExecutor

agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, reduce_k_below_max_tokens=True)

In [38]:
agent_executor.run("Describe me how do I connect two behaviours using the pronto framework?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find documentation on how to connect two behaviors using the Pronto framework.
Action: search_readme
Action Input: "connect behaviors Pronto framework"[0m

Observation:[33;1m[1;3m[Document(page_content='primary means to assemble your game by wiring Behaviors together.', metadata={'source': 'pronto/README.md'}), Document(page_content='Play prototypes created in pronto here.\n\nFunction', metadata={'source': 'pronto/README.md'}), Document(page_content='Pronto consists of a set of Godot Nodes called Behavior that can be added to a Godot scene. These behaviors are aspects that, when combined, result in the expression of complex behavior in the Godot', metadata={'source': 'pronto/README.md'}), Document(page_content='Re-enable the plugin (go to Project>Settings>Plugins and toggle Pronto off and on)\n\nWhen creating a completely new Behavior file.\n\nRe-open the scene to update existing Behaviors', metadata={'s

BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 4097 tokens. However, your messages resulted in 4349 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}

In [39]:
from langchain.agents import AgentType, Tool, initialize_agent
from langchain.llms import OpenAI

agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

In [40]:
adversarial_prompt = """
For this new prompt, you only have access to the tool 'search_readme' and 'search_codebase'.

Question: How do I connect two behaviors using the pronto framework."""

In [41]:
agent.run(adversarial_prompt)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find documentation on the pronto framework to understand how to connect two behaviors.
Action: search_readme
Action Input: pronto framework[0m
Observation: [33;1m[1;3m[Document(page_content='Pronto', metadata={'source': 'pronto/README.md'}), Document(page_content='Pronto is a framework for Godot to make prototyping game mechanics faster. It is not a framework', metadata={'source': 'pronto/README.md'}), Document(page_content='used to consolidate changes in the framework.', metadata={'source': 'pronto/README.md'}), Document(page_content='prepared some for you to use.', metadata={'source': 'pronto/README.md'}), Document(page_content='Pronto scatters code throughout the scene to be as close to the place where it is relevant. Since there is limited space on the screen, we provide a set of utility functions that make formulating', metadata={'source': 'pronto/README.md'}), Document(page_content='super._ready() and super

ValueError: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Could not parse LLM output: `I have searched both the readme and codebase for information on connecting two behaviors using the pronto framework, but I couldn't find any specific instructions. It's possible that this information is not documented or available in the codebase.`