In [None]:
import os
from dotenv import load_dotenv

from neo4j import GraphDatabase
from neo4j import  Driver
from pydantic import BaseModel, Field
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import  RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from yfiles_jupyter_graphs import GraphWidget

load_dotenv()

True

In [4]:
graph = Neo4jGraph()

In [6]:
loader = TextLoader(file_path="dummytext.txt")
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=256,
                                               chunk_overlap=32)
documents = text_splitter.split_documents(documents=docs)

In [12]:
llm = llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash",
                                   temperature=0,
                                   timeout=None)
llm_transformer = LLMGraphTransformer(llm=llm)
graph_documents = llm_transformer.convert_to_graph_documents(documents)

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised DeadlineExceeded: 504 Deadline Exceeded.


In [13]:
graph_documents[0]

GraphDocument(nodes=[Node(id="Amico'S Family", type='Family', properties={}), Node(id='Legacy', type='Concept', properties={}), Node(id='Love', type='Concept', properties={}), Node(id='Tradition', type='Concept', properties={})], relationships=[Relationship(source=Node(id="Amico'S Family", type='Family', properties={}), target=Node(id='Legacy', type='Concept', properties={}), type='HAS', properties={}), Relationship(source=Node(id='Legacy', type='Concept', properties={}), target=Node(id='Love', type='Concept', properties={}), type='INCLUDES', properties={}), Relationship(source=Node(id='Legacy', type='Concept', properties={}), target=Node(id='Tradition', type='Concept', properties={}), type='INCLUDES', properties={})], source=Document(metadata={'source': 'dummytext.txt'}, page_content='1. The Story of Amico’s Family: A Legacy of Love and Tradition'))

In [14]:
graph.add_graph_documents(graph_documents=graph_documents,
                          baseEntityLabel=True,
                          include_source=True)

In [None]:
def showGraph() -> GraphWidget:
    driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth= (os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"]))
    session = driver.session()
    # Return all mentions relationships 
    widget = GraphWidget(graph = session.run("MATCH (s)-[r:!MENTIONS]->(t) RETURN s,r,t").graph())
    widget.node_label_mapping = "id"
    return widget

showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [17]:
vector_index = Neo4jVector.from_existing_graph(
    GoogleGenerativeAIEmbeddings(model="embedding-001"),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

vector_retriever = vector_index.as_retriever()

In [48]:
vector_retriever.invoke("Who are Nonna Lucia and Giovanni Caruso?")

[Document(metadata={'source': 'dummytext.txt'}, page_content='\ntext: Giovanni Caruso and Maria: The Founding Generation'),
 Document(metadata={'source': 'dummytext.txt'}, page_content='\ntext: Nonna Lucia: The Matriarch and Mentor'),
 Document(metadata={'source': 'dummytext.txt'}, page_content='\ntext: 5. Bella Vita - Los Angeles: Owned by Lucia Caruso'),
 Document(metadata={'source': 'dummytext.txt'}, page_content='\ntext: 1. La Dolce Vita - Rome: Owned by Antonio Caruso')]

# RAG

In [40]:
driver = GraphDatabase.driver(
        uri = os.environ["NEO4J_URI"],
        auth= (os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"]))
    

def create_fulltext_index(tx):
    query = '''
    CREATE FULLTEXT INDEX `fulltext_entity_id` 
    FOR (n:__Entity__) 
    ON EACH [n.id];
    '''
    tx.run(query)

def create_index():
    with driver.session() as session:
        session.execute_write(create_fulltext_index)
        print("Fulltext index created successfully.")

try:
    create_index()
except:
    pass

driver.close()

Fulltext index created successfully.


In [41]:
class Entities(BaseModel):
    """Identifying information about entities."""

    names: list[str] = Field(
        ...,
        description="All the person, organization, or business entities that appear in the text"
    )

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are extracting organization and person entities from the text.",
        ),
        (
            "human",
            "Use the given format to extract information from the following input: {question}"
        )
    ]
)

entity_chain = prompt | llm.with_structured_output(Entities)

In [42]:
entity_chain.invoke({"question":"Who are Nonna Lucia and Giovanni Caruso?"}).names

['Nonna Lucia', 'Giovanni Caruso']

In [43]:
def generate_full_text_query(input: str) -> str:
    words = [el for el in remove_lucene_chars(input).split() if el]
    if not words:
        return ""
    full_text_query = " AND ".join([f"{word}~2" for word in words])
    print(f"Generated Query: {full_text_query}")
    return full_text_query.strip()


# Fulltext index query
def graph_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = entity_chain.invoke(question)
    for entity in entities.names:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('fulltext_entity_id', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": entity},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [45]:
print(graph_retriever("Who is Nonna Lucia?"))

Nonna Lucia - TAUGHT -> Amico
Nonna Lucia - TAUGHT -> Sicilian Cooking
Lucia - TAUGHT -> Aspiring Chefs
Lucia - LOCATED_IN -> Los Angeles
Lucia - FOCUSED_ON -> Sustainable Cooking Practices
Lucia - SUPPORTS -> Local Artists
Lucia - HAS_COMMITMENT_TO -> Sustainability
Lucia - IS_ASSOCIATED_WITH -> Bella Vita
Lucia - INHERITED_FROM -> Grandmother
Lucia - FOUNDED -> Bella Vita
Lucia - NAMED_AFTER -> Grandmother
Lucia - USED_RECIPES_FROM -> Grandmother
Lucia - GRANDMOTHER_OF -> Amico
Lucia - MATRIARCH_OF -> Caruso Family
Lucia - SISTER_OF -> Antonio


In [46]:
def full_retriever(question: str):
    graph_data = graph_retriever(question)
    vector_data = [el.page_content for el in vector_retriever.invoke(question)]
    final_data = f"""Graph data:
    {graph_data}
    vector data:
    {"#Document ". join(vector_data)}
    """
    return final_data

In [49]:
template = """
Answer the question based only on the following context:
{context}
Question: {question}
Use natural language and be concise.
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
        {
            "context": full_retriever,
            "question": RunnablePassthrough(),
        }
    | prompt
    | llm
    | StrOutputParser()
)

In [50]:
chain.invoke(input="Who is Nonna Lucia? Did she teach anyone about restaurants or cooking?")

"Nonna Lucia is Lucia, Antonio's sister and Amico's grandmother, and the matriarch of the Caruso family. Yes, she taught Amico and her grandchildren the art of Sicilian cooking, and also taught aspiring chefs about sustainable cooking practices."