### Setup and Installation

In [25]:
# !pip install langchain -q
# !pip install langchain-community -q
# !pip install langchain-experimental -q
# !pip install neo4j -q
# !pip install langchain-chroma -qU
# !pip install langchain-google-genai -qU

In [26]:
# from google.colab import userdata
# import os

# import warnings
# warnings.filterwarnings('ignore')

### Initialize OpenAI LLM

In [27]:
from langchain_google_genai import ChatGoogleGenerativeAI
import os

os.environ["GOOGLE_API_KEY"] = "AIzaSyBcUsfH8V9z9ES0SVlYRAZAY_Lp2AdO800"

from langchain_google_genai import GoogleGenerativeAI
llm=GoogleGenerativeAI(
    model="gemini-2.5-flash", temperature=0.1
    )

### Initialize Embedding Model

In [28]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001")

### Load Data

In [29]:
from langchain.schema import Document

text = Document(page_content="""
Three students, A, B, and C, are tackling two subjects, Mathematics(X) and Literature(Y). Each has a unique perspective, weaving their experiences into a shared academic journey. A, gifted in Mathematics, thrives on solving equations but struggles with the abstract world of poetry and storytelling. On the other hand, B shines in Literature, captivating others with a flair for creative writing, yet finds numbers daunting and formulas perplexing.
C, a generalist, performs decently in both subjects but often bridges gaps between A and B. While A helps B understand mathematical concepts, B guides A through essay writing. Meanwhile, C organizes group study sessions, offering real-world examples to connect ideas from X and Y, making both subjects more relatable. Their collaboration not only enhances their learning but fosters a sense of camaraderie, demonstrating the power of teamwork in overcoming challenges.
""")

### Split Documents into Chunks

In [30]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=250,chunk_overlap=30)

chunks = splitter.split_documents([text])

### Graph Initialization and Transformation

In [32]:
from langchain_community.graphs import Neo4jGraph

os.environ["NEO4J_URI"] = "neo4j+s://d7d4b5b1.databases.neo4j.io"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "POPst623dzIc8xW777nPvf6MJl91QhlGcAzXhOpOa-w"

# initialize Neo4j graph database
graph = Neo4jGraph()

In [33]:
from langchain_experimental.graph_transformers import LLMGraphTransformer

graph_transformer = LLMGraphTransformer(llm = llm)

In [34]:
# convert text chunks to graph nodes
graph_documents = graph_transformer.convert_to_graph_documents(chunks)

In [35]:
graph_documents[0]

GraphDocument(nodes=[Node(id='C', type='Person', properties={}), Node(id='A', type='Person', properties={}), Node(id='gifted in Mathematics', type='Characteristic', properties={}), Node(id='Mathematics', type='Subject', properties={}), Node(id='Literature', type='Subject', properties={}), Node(id='B', type='Person', properties={}), Node(id='thrives on solving equations', type='Characteristic', properties={})], relationships=[Relationship(source=Node(id='A', type='Person', properties={}), target=Node(id='Mathematics', type='Subject', properties={}), type='TACKLES', properties={}), Relationship(source=Node(id='A', type='Person', properties={}), target=Node(id='Literature', type='Subject', properties={}), type='TACKLES', properties={}), Relationship(source=Node(id='B', type='Person', properties={}), target=Node(id='Mathematics', type='Subject', properties={}), type='TACKLES', properties={}), Relationship(source=Node(id='B', type='Person', properties={}), target=Node(id='Literature', type=

In [36]:
for node in graph_documents[0].nodes:
  print(node)

id='C' type='Person' properties={}
id='A' type='Person' properties={}
id='gifted in Mathematics' type='Characteristic' properties={}
id='Mathematics' type='Subject' properties={}
id='Literature' type='Subject' properties={}
id='B' type='Person' properties={}
id='thrives on solving equations' type='Characteristic' properties={}


In [37]:
for relationship in graph_documents[0].relationships:
  print(relationship)

source=Node(id='A', type='Person', properties={}) target=Node(id='Mathematics', type='Subject', properties={}) type='TACKLES' properties={}
source=Node(id='A', type='Person', properties={}) target=Node(id='Literature', type='Subject', properties={}) type='TACKLES' properties={}
source=Node(id='B', type='Person', properties={}) target=Node(id='Mathematics', type='Subject', properties={}) type='TACKLES' properties={}
source=Node(id='B', type='Person', properties={}) target=Node(id='Literature', type='Subject', properties={}) type='TACKLES' properties={}
source=Node(id='C', type='Person', properties={}) target=Node(id='Mathematics', type='Subject', properties={}) type='TACKLES' properties={}
source=Node(id='C', type='Person', properties={}) target=Node(id='Literature', type='Subject', properties={}) type='TACKLES' properties={}
source=Node(id='A', type='Person', properties={}) target=Node(id='gifted in Mathematics', type='Characteristic', properties={}) type='HAS_CHARACTERISTIC' propertie

In [38]:
# add nodes and relationships to graph
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [39]:
# indexing enables fast searches within text-based properties
def create_fulltext_index(g):
  cypher = "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]"
  g.query(cypher)

create_fulltext_index(graph)

### Querying the Graph and Entity Retrieval

In [40]:
# from langchain_core.prompts import ChatPromptTemplate

# entity_prompt = ChatPromptTemplate.from_messages(
#     [
#         (
#             "system","You are extracting entities from the text.",
#         ),
#         (
#             "human","Use the following information to extract entities"
#             "input: {question}",
#         ),
#     ]
# )



In [41]:
from pydantic import BaseModel
from typing import List

class Entity(BaseModel):
    type: str
    value: str

class Entities(BaseModel):
    entities: List[Entity]

In [42]:
from langchain_core.prompts import PromptTemplate

entity_prompt = PromptTemplate.from_template("""
Extract named entities from the text below.
Return the result **as JSON** with this format(types can be anithing you want, not only person,location, subject):

{{
  "entities": [
    {{"type": "PERSON", "value": "John"}},
    {{"type": "LOCATION", "value": "Paris"}}
  ]
}}

Text: "{question}"
""")


In [43]:
# from pydantic import BaseModel, Field
# from typing import List

# class Entities(BaseModel):
#     names: List[str] = Field(
#         ...,
#         description="All the entities that appear in the text",
#     )

In [44]:
from langchain.output_parsers import PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=Entities)

In [45]:
entity_chain = entity_prompt | llm | parser

In [47]:
entity_chain.invoke({"question": "who learn both Mathematics(X) and Literature(Y) subjects"})

Entities(entities=[Entity(type='Subject', value='Mathematics(X)'), Entity(type='Subject', value='Literature(Y)')])

### Graph Retriever

Lucene chars: https://api.python.langchain.com/en/latest/_modules/langchain_community/vectorstores/neo4j_vector.html#remove_lucene_chars

In [48]:
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars

def generate_full_text_query(input):
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

def graph_retriever(question: str) -> str:
    result = ""
    entities = entity_chain.invoke({"question": question})
    for entity in entities.entities:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node,score
            CALL {
            WITH node
            MATCH (node)-[r:!MENTIONS]->(neighbor)
            RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
            UNION ALL
            WITH node
            MATCH (node)<-[r:!MENTIONS]-(neighbor)
            RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 20
            """,
            {"query": generate_full_text_query(entity.value)},  # <-- FIXED
        )
        result += "\n".join([el['output'] for el in response])

    return result

In [49]:
print(graph_retriever("who learn both Maths and Literature subjects"))



C - TACKLES -> Literature
A - TACKLES -> Literature
B - TACKLES -> Literature
B - EXCELS_IN -> Literature
B - HAS_CHARACTERISTIC -> excels in Literature


### Semantic Search Retriever

In [51]:
from langchain_community.vectorstores import Neo4jVector

vector_index = Neo4jVector.from_existing_graph(
    embedding,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [52]:
vector_index.similarity_search("who learn both X and Y subjects", k=2)



[Document(metadata={}, page_content='\ntext: offering real-world examples to connect ideas from X and Y, making both subjects more relatable. Their collaboration not only enhances their learning but fosters a sense of camaraderie, demonstrating the power of teamwork in overcoming challenges.'),
 Document(metadata={}, page_content='\ntext: C, a generalist, performs decently in both subjects but often bridges gaps between A and B. While A helps B understand mathematical concepts, B guides A through essay writing. Meanwhile, C organizes group study sessions, offering real-world examples')]

In [53]:
def retriever(question):
    graph_search_result = graph_retriever(question)
    semantic_search_result = [data.page_content for data in vector_index.similarity_search(question, k=2)]
    final_data = f"Graph data:{graph_search_result}\nText data:{' '. join(semantic_search_result)}"
    return final_data

### Define Prompt Template for RAG

In [55]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system","Answer this question using the provided context only.",
        ),
        (
            "human","Context: {context}"
            "Question: {question}",
        ),
    ]
)

### Create RAG Chain

In [None]:
from langchain_core.runnables import RunnablePassthrough

chain = (
    {
      "context": retriever,
      "question": RunnablePassthrough()
    }
    | prompt
    | llm
)


### Invoke RAG Chain with Example Questions

In [57]:
response = chain.invoke("who learn both X and Y subjects")

print(response.content)



AttributeError: 'str' object has no attribute 'content'

In [None]:
response = chain.invoke("who are the students in that school")

print(response.content)



The students in that school are A, B, and C.


In [62]:
import sys
print(sys.version)

3.11.0 (main, Oct 24 2022, 18:26:48) [MSC v.1933 64 bit (AMD64)]
