In [None]:
from dotenv import load_dotenv
import os
import pandas as pd
from langchain.chat_models import ChatOpenAI
from langchain.chains import GraphCypherQAChain
from langchain.graphs import Neo4jGraph
from langchain.tools import Tool
from langchain.agents import AgentType,initialize_agent
from langchain.embeddings import OpenAIEmbeddings

from sqlalchemy import create_engine


load_dotenv()  
URI = os.environ["NEO4J_INSTANCE01_URI"]
USER = os.environ["NEO4J_INSTANCE01_USER"]
PWD = os.environ["NEO4J_INSTANCE01_KEY"]

graph = Neo4jGraph(url=URI, username=USER, password=os.environ["NEO4J_INSTANCE01_KEY"])
llm = ChatOpenAI(temperature=0,model='gpt-4')
embedding = OpenAIEmbeddings(model='text-embedding-ada-002')
connection = create_engine(os.environ.get('DATABASE_URL'))



In [None]:
def search_similar_nodes(text_list : list):
    nodes = []
    for text in text_list:
        text_embedding = embedding.embed_query(text)
        vector_string = str(text_embedding)
        sql_query = "SELECT content FROM nodes_embeddings ORDER BY embedding <-> '"+ vector_string +"' LIMIT 2;"
        query_result = pd.read_sql(sql_query,connection)
        nodes += query_result.content.to_list()
    return nodes

    

In [None]:
neo4j_search_chain = GraphCypherQAChain.from_llm(
    llm,
    graph=graph,
    verbose=True,
    top_k=100,
    return_direct=True
)

In [None]:
graph_schema = str(graph.get_schema).replace("{","{{").replace("}","}}")
tools = [
    Tool.from_function(
        func=search_similar_nodes,
        name = "Search similar nodes",
        description=f"""
## Purpose
This function is useful to retreive similar nodes to a list of concepts.

## Input
The input is a list of strings. Each string is a concept.

## Output 
You will receive the list of the interesting nodes for the given concepts
"""),
    Tool.from_function(
        func=lambda x: neo4j_search_chain.run("Return the neighborhood subgraph of size 2 of nodes " + str(x)),
        name = "Extract the neighborhood subgraph",
        description=f"""
## Purpose
This function is useful to retreive the the neighborhood subgraph of a given list of nodes

## Input
The input has to be a list of nodes. For example : ['1. Introduction','Abstract']

## Output 
The neighborhood subgraph of the nodes
"""),]

In [None]:
agent = initialize_agent(
    tools= tools, 
    llm = llm, 
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
    verbose=True,
    max_iterations = 10,
   )


In [None]:
sys_message = """
You are an asistant specialized in question answering task using a knowledge graph database of a scientific paper.
You will be ask a question about the paper. Your job is to build the answer step by step using the tools.

1. Indentify in the question the list of concepts targetted. There can be multiple concepts.
2. Use "Search similar nodes" tool the get the insteresting nodes. You have to pass the list of concepts from the previous step.
3. Use "Extract the neighborhood subgraph" tool passing the list of nodes to retreive the information.
4. Use this information to answer the initial question. 


Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: request the information from database using the tool "Search in the scientific article graph database".
Action Input: the input to the action is a simple free text query
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Question: {input}
Thought:{agent_scratchpad}
"""
agent.agent.llm_chain.prompt.template = sys_message

In [None]:
agent.run('What is the article about?')