#### KG Agent with Langchain

- Determining how accurate LLMs are at navigating a KG similar to the ones of a website
- Determining what inputs are necessary for that KG

Testing langchain’s ability to have agents as tools.

In [1]:
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.schema import AgentAction, AgentFinish, HumanMessage
from langchain.prompts import BaseChatPromptTemplate
from langchain import SerpAPIWrapper, LLMChain
from langchain.chat_models import ChatOpenAI

from typing import List, Union, Callable
import pprint
import pandas as pd
import re

from KG_tools import *

This will be replaced by real web page parsing function. Each node description contains its child node description, too. It can be seen as MDP process.

In [None]:
def get_node(current_node, filepath="data/KG.xlsx"):
    child_node_name_list = []
    df = pd.read_excel(filepath)

    if current_node in df['node'].tolist():
        row = df.loc[df['node'] == current_node]
        if str(row['next_node'].tolist()[0]) != 'nan':
            child_node_name_list = row['next_node'].tolist()[0].split(', ')
    else:
        print("NO CURRENT NODE")

    child_node_list = []
    for i, child_node in enumerate(child_node_name_list):
        row = df.loc[df['node'] == child_node]
        description = row['description'].tolist()[0] + " If you select this element, then the element interactable in the next webpage is: "

        if ', ' in str(row['next_node'].tolist()[0]):
            for grandchild_node in row['next_node'].tolist()[0].split(', '):
                description += '\n  Element Name: ' + grandchild_node + ' Element Description: ' + df.loc[df['node'] == grandchild_node]['description'].tolist()[0]

        elif str(row['next_node'].tolist()[0]) != 'nan':
            description += '\n  Element Name: ' + row['next_node'].tolist()[0] + ' Element description: '+ df.loc[df['node'] == row['next_node'].tolist()[0]]['description'].tolist()[0]
        else:
            description += "Nothing."
        
        child_node_list.append(Tool(name=child_node, func=eval(child_node), description=description, return_direct=True))
    
    return child_node_list

Test this state transition function.

In [None]:
get_node('disable')

Our main prompt for agent.

In [None]:
MAIN_PROMPT = """You are a web interaction AI and you have all interactable elements of the current web page. Each element has a name and a description.
HISTORY is a time series of all the elements you have selected from the initial page to the current page so far.
USER GOAL is the web interaction that the user wants to achieve, and you have to achieve it step by step by selecting web elements.
Given USER GOAL and HISTORY, you need to choose the element from ELEMENTS that are most likely to achieve the USER GOAL.

The USER GOAL is: 
{input}

The HISTORY is: {history}

The current webpage ELEMENTS:
{tools}

When you determine that an element is selected, if the element is a clickable element (a button for example), then the input to that element should be None. 
If it is an input box element, then the input to the tool should be the text content you want to enter.

If you think the USER GOAL is finally done at current webpage (comparing the HISTORY and USER GOAL to make sure of that), then you must choose nothing and return "FINISH" directly.
You should only output selected element name, and the element's input if needed. Your Output:
ELEMENT NAME:
ELEMENT INPUT:
"""

Prompt template used for formatting main prompt every step. 

In [None]:
class OneStepPromptTemplate(BaseChatPromptTemplate):
    template: str
    tools: List[Tool]
    
    def format_messages(self, **kwargs) -> str:
        intermediate_steps = kwargs.pop("intermediate_steps")
        history = ""
        for action, observation in intermediate_steps:
            history += '\nOBSERVATION: ' + observation # action.log
        kwargs["history"] = history
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        formatted = self.template.format(**kwargs)
        return [HumanMessage(content=formatted)]

Output parser makes LLM's output `AgentAction`.

In [None]:
class OneStepOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        if "FINISH" in llm_output:
            return None
            # return AgentFinish(return_values={"output": llm_output.split("Final Answer:")[-1].strip()}, log=llm_output)

        regex = r"ELEMENT\s*\d*\s*NAME\s*\d*\s*:(.*?)\nELEMENT\s*\d*\s*INPUT\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            return None
            # raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output.replace('\n', ' '))

Some configurations about GPT.

In [None]:
import yaml

with open("config/config.yaml", 'r') as stream:
    config = yaml.safe_load(stream)

os.environ["OPENAI_API_KEY"] = config['OPENAI_API_KEY']
TEMPERATURE = config['TEMPERATURE']
MODEL_NAME = config['MODEL_NAME']

Testing our method with dataset in `data/dataset.xlsx` now!

In [None]:
df = pd.read_excel('data/dataset.xlsx')
llm = ChatOpenAI(model_name=MODEL_NAME, temperature=TEMPERATURE)

for index, row in df.iterrows():
    
    # initialization
    node = (AgentAction(tool="start", tool_input="None", log=""), "Start.")
    node_history = [node]
    user_goal = row['user_query']
    print("USER GOAL:", user_goal)

    while True:
        node_list = get_node(node[0].tool)
        if not len(node_list):
            print("ARRIVED END STATE")
            break

        prompt = OneStepPromptTemplate(template=MAIN_PROMPT,
                                        tools=node_list,
                                        input_variables=["input", "intermediate_steps"]
                                    )
        output_parser = OneStepOutputParser()
        llm_chain = LLMChain(llm=llm, prompt=prompt)
        agent = LLMSingleActionAgent(llm_chain=llm_chain, 
                                     stop=["FINISH"], 
                                     output_parser=output_parser,
                                     allowed_tools=[tool.name for tool in node_list]
                                    )

        node = agent.plan(input=user_goal, intermediate_steps=node_history)

        if node == None:
            print("DONE")
            break 

        print("ACTION:", node)
        node = (node, f"You selected ELEMENT {node.tool} from {[n.name for n in node_list]}.")
        node_history.append(node)

What to be done
- web parser + selenium tool functions - See `building_KG.ipynb`
- (state, action) pair or using GPT for summrization  `DONE`
- Adding thoughts  `DONE`
- stop word problem `DONE`