LangChain, Ollama & Neo4j

In [1]:
#%pip install langchain-ollama langchain-experimental neo4j tiktoken yfiles_jupyter_graphs python-dotenv json-repair langchain_core

In [29]:
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_core.output_parsers import StrOutputParser

#from langchain_community.chat_models import ChatOllama
from langchain_ollama import ChatOllama
from langchain_experimental.llms.ollama_functions import OllamaFunctions
from langchain_ollama import OllamaEmbeddings


from neo4j import GraphDatabase
from neo4j import  Driver
#from langchain_community.graphs import Neo4jGraph
from langchain_neo4j import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector



import os
from dotenv import load_dotenv
load_dotenv()

#graphı jup te gösterebilmek için
from yfiles_jupyter_graphs import GraphWidget

#RunnablePassthrough, LangChain işlem hatlarında bir veri üzerinde herhangi bir değişiklik yapmadan 
# onu olduğu gibi iletmek için kullanılır. 
# Veri akışı içinde bir adımı atlamak veya test etmek istediğinizde faydalıdır.
from langchain_core.runnables import  RunnablePassthrough


#LLMGraphTransformer, büyük dil modelleriyle (LLM) çalışırken grafik tabanlı bir dönüşüm gerçekleştirmek için kullanılır. 
# Örneğin, bir bilgi grafiğini analiz etmek, dönüştürmek veya zenginleştirmek için kullanılabilir.
from langchain_experimental.graph_transformers import LLMGraphTransformer

#remove_lucene_chars, Neo4j veritabanıyla çalışırken, sorgulamalarda Lucene özel karakterlerini temizlemek için kullanılır.
#  Lucene, Neo4j'nin sorgulama motorlarından biridir ve bazı karakterlerin doğru çalışabilmesi için kaçış yapılması veya kaldırılması gerekebilir.
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars


from pydantic import BaseModel, Field



In [2]:
os.getenv("NEO4J_PASSWORD")

'123456789'

In [4]:
graph = Neo4jGraph()

In [5]:
graph._check_driver_state()

In [6]:
#text loading
loader = TextLoader(file_path="Amico’s Family.txt", encoding="utf-8")
docs = loader.load()
docs

[Document(metadata={'source': 'Amico’s Family.txt'}, page_content='1. The Story of Amico’s Family: A Legacy of Love and Tradition\nIn the idyllic village of Santa Caterina, amidst the rolling hills and sun-kissed landscapes of Sicily, lies the genesis of the Caruso family, a lineage intertwined with the island\'s rich culinary tapestry. The Carusos were not mere inhabitants of the land; they were the keepers of a culinary heritage that spanned generations. Each family member contributed their unique flair, crafting a narrative of flavors that reflected their diverse experiences and deep-seated love for food.\n\nGiovanni Caruso and Maria: The Founding Generation\n\nGiovanni Caruso, Amico\'s great-grandfather, was a man of the earth. His calloused hands spoke of years spent cultivating the fertile soils of Santa Caterina, producing olives and grapes that were the pride of the region. Giovanni was not just a farmer but an alchemist of flavors, blending the fruits of his labor into exquisi

In [7]:
#chunking
textSplitter = RecursiveCharacterTextSplitter(
    chunk_size= 250,
    chunk_overlap= 25,
)
documents = textSplitter.split_documents(documents=docs) #split_text(docs[0].page_content)
print(len(documents))
documents[0]

73


Document(metadata={'source': 'Amico’s Family.txt'}, page_content='1. The Story of Amico’s Family: A Legacy of Love and Tradition')

In [8]:
#llm = ChatOllama(model="llama3.1", temperature=0)
#llm.invoke("9+8").content

In [8]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-4o-mini")

In [9]:
llm.invoke("9+8").content

'9 + 8 = 17.'

In [10]:
llmTransformer = LLMGraphTransformer(llm=llm)

In [11]:
#Convert a sequence of documents into graph documents.
graphDocs = llmTransformer.convert_to_graph_documents(documents=documents)

In [None]:
print(len(graphDocs))
graphDocs[0]

In [12]:

graphDocs[5].nodes

[Node(id='Giovanni Caruso', type='Person', properties={}),
 Node(id='Amico', type='Person', properties={}),
 Node(id='Santa Caterina', type='Location', properties={})]

In [13]:
graphDocs[5].relationships

[Relationship(source=Node(id='Giovanni Caruso', type='Person', properties={}), target=Node(id='Amico', type='Person', properties={}), type='GREAT_GRANDFATHER', properties={}),
 Relationship(source=Node(id='Giovanni Caruso', type='Person', properties={}), target=Node(id='Santa Caterina', type='Location', properties={}), type='CULTIVATED_IN', properties={})]

In [14]:
#graph veritabanına veri ekleme
graph.add_graph_documents(
    graphDocs,
    baseEntityLabel=True,
    include_source=True
)

In [16]:
def showGraph():
    driver = GraphDatabase.driver(
        uri = os.getenv("NEO4J_URI"),
        auth=(os.getenv("NEO4J_USERNAME"),os.getenv("NEO4J_PASSWORD"))
        )
    
    session = driver.session()
    widget = GraphWidget(graph=session.run('MATCH (s)-[r]->(t) WHERE NOT type(r) = "MENTIONS" RETURN s, r, t;').graph())
    #BU SORGU TÜM DÜĞÜMLERİ GETİRİR ANCAK ARADAKI BAĞLANTI MENTIONS DEĞİLSE: MENTİONS NODELARIN HANGİ DÖKÜMANDA BULUNDUGUNU İFADE EDER.
    widget.node_label_mapping = "id"
    return widget

showGraph()

GraphWidget(layout=Layout(height='800px', width='100%'))

In [18]:
#VECTOR STORE YARATMA
from langchain_openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(model="text-embedding-ada-002")

In [36]:
#Neo4j veritabanındaki düğümler üzerinde vektör tabanlı arama yapılabilmesini sağlayan bir yapı oluşturuyor
vector_index = Neo4jVector.from_existing_graph(
    embedding=embedding,
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

vectorRetriever = vector_index.as_retriever()

In [53]:
# Bir sorgu ile en alakalı belgeleri getir
question = "Who are Nonna Lucia and Giovanni Caruso?"
results = vectorRetriever.get_relevant_documents(question)
results



[Document(metadata={'source': 'Amico’s Family.txt'}, page_content="\ntext: Lucia, Antonio's sister and Amico's grandmother, was the matriarch of the Caruso family. A culinary sage, Nonna Lucia was the custodian of the family's recipes, a role she took very seriously. Her kitchen was a sacred space, where she taught her"),
 Document(metadata={'source': 'Amico’s Family.txt'}, page_content='\ntext: Nonna Lucia: The Matriarch and Mentor'),
 Document(metadata={'source': 'Amico’s Family.txt'}, page_content="\ntext: where she taught her grandchildren the art of Sicilian cooking. Under her watchful eye, Amico learned the delicate balance of flavors in a Caponata and the intricate process of making fresh pasta. Nonna Lucia's influence extended beyond the kitchen;"),
 Document(metadata={'source': 'Amico’s Family.txt'}, page_content='\ntext: Each member of the Caruso family played a role in nurturing the next generation of chefs. Antonio hosted workshops in Rome, sharing his innovative techniques

In [65]:
results[0].page_content

"\ntext: Lucia, Antonio's sister and Amico's grandmother, was the matriarch of the Caruso family. A culinary sage, Nonna Lucia was the custodian of the family's recipes, a role she took very seriously. Her kitchen was a sacred space, where she taught her"

In [33]:
class Entities(BaseModel):
    names: list[str]=Field(...,
                           description="All the person, organization or business entities that appear in the text")
    
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are extracting organization and person entities from the text."),
    ("human", "Use the given format to extract information from the following: Input: {question}")
])

#Model wrapper that returns outputs formatted to match the given schema.
'''
The output schema. Can be passed in as:

- an OpenAI function/tool schema,
- a JSON Schema,
- a TypedDict class (support added in 0.1.20),
- or a Pydantic class.
''' 
entityChain = prompt | llm.with_structured_output(Entities)

In [54]:
#verdiğimiz şemayı döndürdü yani Entities
entityChain.invoke({"question":"Who are Nonna Lucia and Giovanni Caruso?"}).names

['Nonna Lucia', 'Giovanni Caruso']

In [55]:
entity = entityChain.invoke({"question":question}).names[0]
entity

'Nonna Lucia'

In [48]:
response = graph.query(
            """CALL db.index.fulltext.queryNodes('FTS_Entity_id', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": entity},
        )



In [49]:
response

[{'output': 'Nonna Lucia - INFLUENCE -> Amico'},
 {'output': 'Nonna Lucia - TAUGHT -> Sicilian Cooking'},
 {'output': 'Lucia - SISTER -> Antonio'},
 {'output': 'Lucia - GRANDMOTHER -> Amico'},
 {'output': 'Lucia - MATRIARCH -> Caruso Family'},
 {'output': 'Lucia - CUSTODIAN_OF_RECIPES -> Lucia'},
 {'output': 'Lucia - LOCATED_IN -> Los Angeles'},
 {'output': 'Lucia - NAMED_AFTER -> Grandmother'},
 {'output': 'Lucia - INHERITED_TALENT -> Grandmother'},
 {'output': 'Lucia - FOUNDED -> Bella Vita'},
 {'output': 'Lucia - SUPPORTS -> Local Artists'},
 {'output': 'Lucia - FOCUSED_ON -> Sustainable Cooking Practices'},
 {'output': 'Lucia - CONDUCTED -> Workshops'},
 {'output': 'Lucia - COMMITTED_TO -> Sustainability'},
 {'output': 'Lucia - BROUGHT_TO -> Los Angeles'},
 {'output': 'Lucia - BROUGHT -> Sicily'},
 {'output': 'Lucia - CUSTODIAN_OF_RECIPES -> Lucia'},
 {'output': 'Bella Vita - MANAGED_BY -> Lucia'}]

In [51]:
response[0]["output"]

'Nonna Lucia - INFLUENCE -> Amico'

In [59]:
# bu fonksiyon ilgili questiondaki entitileri bulacak. daha sonra bu entitiyler için srt 3lülerini bulacak.
def graphRetriever(question:str):
    results=""
    entities = entityChain.invoke({"question":"Who are Nonna Lucia and Giovanni Caruso?"}).names
    for entity in entities:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('FTS_Entity_id', $query, {limit:2})
            YIELD node,score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS output
            }
            RETURN output LIMIT 50
            """,
            {"query": entity},
        )

        results += "\n".join(r["output"] for r in response)
        return results

        

In [61]:
print(graphRetriever(question=question))



Nonna Lucia - INFLUENCE -> Amico
Nonna Lucia - TAUGHT -> Sicilian Cooking
Lucia - SISTER -> Antonio
Lucia - GRANDMOTHER -> Amico
Lucia - MATRIARCH -> Caruso Family
Lucia - CUSTODIAN_OF_RECIPES -> Lucia
Lucia - LOCATED_IN -> Los Angeles
Lucia - NAMED_AFTER -> Grandmother
Lucia - INHERITED_TALENT -> Grandmother
Lucia - FOUNDED -> Bella Vita
Lucia - SUPPORTS -> Local Artists
Lucia - FOCUSED_ON -> Sustainable Cooking Practices
Lucia - CONDUCTED -> Workshops
Lucia - COMMITTED_TO -> Sustainability
Lucia - BROUGHT_TO -> Los Angeles
Lucia - BROUGHT -> Sicily
Lucia - CUSTODIAN_OF_RECIPES -> Lucia
Bella Vita - MANAGED_BY -> Lucia


In [73]:
def fullRetriever(question: str):
    graphRetrieverResult = graphRetriever(question)
    vectorRetrieverDocs = vectorRetriever.get_relevant_documents(question)
    vectorRetrieverResult = "".join(d.page_content for d in vectorRetrieverDocs)
    
    return f"""Graph data:
                {graphRetrieverResult}
                vector data:
                {vectorRetrieverResult}
            """

In [74]:
fullRetriever(question)



"Graph data:\n                Nonna Lucia - INFLUENCE -> Amico\nNonna Lucia - TAUGHT -> Sicilian Cooking\nLucia - SISTER -> Antonio\nLucia - GRANDMOTHER -> Amico\nLucia - MATRIARCH -> Caruso Family\nLucia - CUSTODIAN_OF_RECIPES -> Lucia\nLucia - LOCATED_IN -> Los Angeles\nLucia - NAMED_AFTER -> Grandmother\nLucia - INHERITED_TALENT -> Grandmother\nLucia - FOUNDED -> Bella Vita\nLucia - SUPPORTS -> Local Artists\nLucia - FOCUSED_ON -> Sustainable Cooking Practices\nLucia - CONDUCTED -> Workshops\nLucia - COMMITTED_TO -> Sustainability\nLucia - BROUGHT_TO -> Los Angeles\nLucia - BROUGHT -> Sicily\nLucia - CUSTODIAN_OF_RECIPES -> Lucia\nBella Vita - MANAGED_BY -> Lucia\n                vector data:\n                \ntext: Lucia, Antonio's sister and Amico's grandmother, was the matriarch of the Caruso family. A culinary sage, Nonna Lucia was the custodian of the family's recipes, a role she took very seriously. Her kitchen was a sacred space, where she taught her\ntext: Nonna Lucia: The 

In [70]:
template = """ 
###
CONTEXT
###
'''
{context}
'''

###
RULES:
Learn from the CONTEXT and answer the following question.
Use natural language and be concise.
###

QUESTION:
{question}

"""

In [71]:
ragPromt = ChatPromptTemplate.from_template(template)

In [80]:
ragChain = (
    {
        "context": fullRetriever,
        "question": RunnablePassthrough(),
    }
    |ragPromt
    |llm
    |StrOutputParser()
)

In [82]:
question

'Who are Nonna Lucia and Giovanni Caruso?'

In [81]:
ragChain.invoke(input=question)



"Nonna Lucia is the matriarch of the Caruso family and a culinary expert known for her role as the custodian of the family's Sicilian recipes. She taught her grandchildren, including Amico, the art of cooking. Giovanni Caruso is not mentioned in the provided context, so there is no information available about him."