# 05_02: SOLUTION - Walk through of project solution

In [1]:
URI = "..."
USER = "neo4j"
PWD = "..."
OPENAI_API_KEY = "..."

In [None]:
from langchain_neo4j import Neo4jGraph
from langchain_core.documents import Document
from langchain_community.graphs.graph_document import GraphDocument
import os

from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_neo4j import GraphCypherQAChain
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.evaluation.qa.eval_chain import QAEvalChain
from langchain.document_loaders import WikipediaLoader

Change this to output a snippet

In [None]:
text = WikipediaLoader(query="Portugal").load()
text

In [None]:
len(text)

In [None]:
for el in text:
    print(el.metadata['title'])

In [None]:
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0, model_name="gpt-4o")

llm_transformer = LLMGraphTransformer(llm=llm)

graph_documents = llm_transformer.convert_to_graph_documents(text)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

In [None]:
graph = Neo4jGraph(url=URI, username=USER, password=PWD)

graph.add_graph_documents(
    graph_documents,
    include_source=True
)

Check this import

In [None]:
pdf_path = '../data/portugal text.pdf'
loader = PyPDFLoader(pdf_path)
pages = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50
)

chunks = text_splitter.split_documents(pages)
len(chunks)

In [None]:
graph_documents = llm_transformer.convert_to_graph_documents(chunks)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

In [None]:
graph.add_graph_documents(graph_documents)

In [None]:
enhanced_graph = Neo4jGraph(url=URI, username=USER, password=PWD, enhanced_schema=True)

In [None]:
CYPHER_GENERATION_TEMPLATE = """
You are an expert Neo4j Developer translating user questions into Cypher to
answer questions about Portugal.
Convert the user's question based on the schema.

When you are presented with query properties such as id's like "rock pools",
be sure to convert the first letter to capital case, such as "Rock Pools"
before you run the Cypher query.

Schema: {schema}
Question: {question}
"""

cypher_generation_prompt = PromptTemplate(
    template=CYPHER_GENERATION_TEMPLATE,
    input_variables=["schema", "question"],
)

cypher_chain = GraphCypherQAChain.from_llm(
    llm,
    graph=enhanced_graph,
    cypher_prompt=cypher_generation_prompt,
    verbose=True,
    allow_dangerous_requests=True
)



In [None]:
cypher_chain.invoke({"query": "Tell me about the African union?"})

In [None]:
CYPHER_GENERATION_TEMPLATE = """
You are an expert Neo4j Developer translating user questions into Cypher to
answer questions about Portugal.
Convert the user's question based on the schema.

When you are presented with query properties such as id's like "rock pools",
be sure to convert the first letter to capital case, such as "Rock Pools"
before you run the Cypher query.

You also will want to remove words like "the" or "an" in front of entities.  For
example, if I asked "Tell me about the schengen area", the entity is "Schengen Area"
and NOT "The Schengen Area".

For example, if I were to ask "Tell me about the schengen area"," you should create
a Cypher query that finds all nodes with the id "Schengen Area" and then find
all nodes connected to those nodes and use those to forumulate your answer, like this:

MATCH (a {{id: "Schengen Area"}})-[r]-(b)
RETURN a, r, b

Schema: {schema}
Question: {question}
"""

cypher_generation_prompt = PromptTemplate(
    template=CYPHER_GENERATION_TEMPLATE,
    input_variables=["schema", "question"],
)

cypher_chain = GraphCypherQAChain.from_llm(
    llm,
    graph=enhanced_graph,
    cypher_prompt=cypher_generation_prompt,
    verbose=True,
    allow_dangerous_requests=True
)


In [None]:
cypher_chain.invoke({"query": "Tell me about the African union?"})

In [None]:
cypher_chain.invoke({"query": "Where can I go canyoning?"})

In [None]:
CYPHER_GENERATION_TEMPLATE = """
You are an expert Neo4j Developer translating user questions into Cypher to
answer questions about Portugal.
Convert the user's question based on the schema.

When you are presented with query properties such as id's like "rock pools",
be sure to convert the first letter to capital case, such as "Rock Pools"
before you run the Cypher query.

You also will want to remove words like "the" or "an" in front of entities.  For
example, if I asked "Tell me about the schengen area", the entity is "Schengen Area"
and NOT "The Schengen Area".

For example, if I were to ask "Tell me about the schengen area"," you should create
a Cypher query that finds all nodes with the id "Schengen Area" and then find
all nodes connected to those nodes and use those to forumulate your answer, like this:

MATCH (a {{id: "Schengen Area"}})-[r]-(b)
RETURN a, r, b

Do NOT be very restrictive on the types of relationships you specify.  For example,
if I ask "Where can I go canyoning", you should NOT specify the relationship type.
Instead, use a Cypher query like this:

MATCH (a {{id: "Canyoning"}})-[r]-(b:Place)
RETURN a, r, b

Schema: {schema}
Question: {question}
"""

cypher_generation_prompt = PromptTemplate(
    template=CYPHER_GENERATION_TEMPLATE,
    input_variables=["schema", "question"],
)

cypher_chain = GraphCypherQAChain.from_llm(
    llm,
    graph=enhanced_graph,
    cypher_prompt=cypher_generation_prompt,
    verbose=True,
    allow_dangerous_requests=True
)

In [None]:
cypher_chain.invoke({"query": "Where can I go on a cable car?"})

In [None]:
examples = [
    {"query": "Where can I go kayaking?",
     "answer": "Portugal, Mondego, or Zêzere"},
    {"query": "How is Luis related to Isabella of Portugal?", 
     "answer": "They are siblings"},
    {"query": "Where is Braga?", 
     "answer": "In the Minho Region"},
]

In [None]:
eval_chain = QAEvalChain.from_llm(llm)

In [None]:
def evaluate_graph_rag(cypher_chain, eval_chain, examples):

    # Generate predictions by querying the graph
    predictions = []
    for ex in examples:
        graph_response = cypher_chain.invoke({"query": ex["query"]})
        predictions.append({"result": graph_response["result"].strip()})

    # Run evaluation
    #eval_chain = QAEvalChain.from_llm(llm)
    results = eval_chain.evaluate(examples, predictions)

    # Print output
    correct = 0
    for i, res in enumerate(results):
        print(f"Query: {examples[i]['query']}")
        print(f"Prediction from graph: {predictions[i]['result']}")
        print(f"Gold answer: {examples[i]['answer']}")
        print(f"Grade: {res['results']}")
        print("---")
        if res["results"] == "CORRECT":
            correct += 1

    accuracy = correct / len(examples)
    print(f"Graph QA Accuracy: {accuracy:.2f}")

In [None]:
evaluate_graph_rag(cypher_chain, eval_chain, examples)