# Knowledge Graphs with Neo4j
- ref: https://neo4j.com/labs/genai-ecosystem/llamaindex/
- ref: https://docs.llamaindex.ai/en/latest/module_guides/indexing/lpg_index_guide/

In [1]:
from llama_index.llms.litellm import LiteLLM

In [None]:

import os
from dotenv import load_dotenv
load_dotenv()

import sys
sys.path.append('..')

from llama_index.core import SimpleDirectoryReader
from llama_index.llms.litellm import LiteLLM
from llama_index.llms.ollama import Ollama
from llama_index.llms.litellm import LiteLLM
from llama_index.llms.openai import OpenAI
from llama_index.core.llms import ChatMessage
from llama_index.core import Settings
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding
from google.genai.types import EmbedContentConfig
from core.custom_components.custom_parsers import CustomMarkdownNodeParser
from core.custom_components.custom_google_genai import CustomGoogleGenAI
from core.utilities import GoogleGenAIDummyTokensizer
# pip install graspologic
from llama_index.core import Settings


## Initialize Models

In [3]:

# llm = CustomGoogleGenAI(
#             model="gemini-2.5-flash", #"gemini-2.0-flash",
#             api_key=os.environ['GEMINI_API_KEY'], 
#             max_retries=2,  # Number of retry attempts
#             retry_on_rate_limit=True,
#             additional_kwargs={"stream_options": {"include_usage": True}}
#         )
# llm = OpenAI(model="gemini-2.0-flash",
#             api_key=os.environ['GEMINI_API_KEY'],
#             api_base="https://generativelanguage.googleapis.com/v1beta/openai/",
#             max_retries=2,  # Number of retry attempts
#             retry_on_rate_limit=True,
#             additional_kwargs={"stream_options": {"include_usage": True}})
# from llama_index.llms.ollama import Ollama
# llm = Ollama(model="gemma3:12b", request_timeout=120.0, context_window=8000)

llm = LiteLLM(model="gemini/gemini-2.5-flash", max_tokens=8192, max_retries=6)
Settings.llm = llm

from llama_index.embeddings.google_genai import GoogleGenAIEmbedding
embed_model = GoogleGenAIEmbedding(model_name="models/text-embedding-004", api_key=os.environ['GEMINI_API_KEY'])
Settings.embed_model = embed_model


# use the tiktoken (set by default) only, since some types of input only supported by tiktoken only.
# tokenizer = GoogleGenAIDummyTokensizer(llm).encode
# import tiktoken
# tokenizer = tiktoken.encoding_for_model('gpt-4o').encode
# Settings.tokenizer = tokenizer


In [4]:
import nest_asyncio
nest_asyncio.apply()

## Define Input

In [5]:

INPUT_DIR = '../data/langchain/docs/docs'
FILE_TYPES = ['.md', '.mdx']

documents = SimpleDirectoryReader(input_dir=INPUT_DIR, exclude=[], recursive=True, filename_as_id=True,
                                       required_exts=FILE_TYPES).load_data()
len(documents)

423

In [6]:

doc_start, doc_end = 25, 28

for doc in documents[doc_start:doc_end]:
    print(doc.id_)
# Concatenate documents with filename and content
# concatenated_docs = ""
# for doc in documents[doc_start:doc_end]:
#     file_path = doc.metadata['file_path']
#     path_parts = file_path.split('\\')
#     file_name = '\\'.join(path_parts[-2:])
#     concatenated_docs += f"\n\n=== File: {file_name} ===\n\n"
#     concatenated_docs += doc.text
#     concatenated_docs += "\n\n=== End File ===\n"
# print(concatenated_docs)

# len(concatenated_docs)


h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\rag.mdx
h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\retrieval.mdx
h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\retrievers.mdx


## Build Graph Index

In [7]:

md_node_parser = CustomMarkdownNodeParser(max_tokens=2000, max_header_level=2, tokenizer=Settings.tokenizer)


In [8]:
md_nodes = md_node_parser.get_nodes_from_documents(documents[doc_start:doc_end])
len(md_nodes)

16

In [None]:
# batch_size = 10
# total_batches = math.ceil(len(nodes) / batch_size)
# for batch_idx in range(total_batches):
    #     start_idx = batch_idx * batch_size
    # end_idx = min(start_idx + batch_size, len(md_nodes)) 
    # batch_nodes = md_nodes[start_idx:end_idx]


In [9]:
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

### Make Sure Neo4j Desktop (or cli) is started and database is running

In [11]:
graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="pineapple",
    url="bolt://localhost:7687",
    database="doc2agent",
)

In [12]:
from llama_index.core import PropertyGraphIndex
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor, DynamicLLMPathExtractor
import math


In [13]:
gindex = PropertyGraphIndex(
    nodes=[],
    llm=llm,
    embed_model=embed_model, 
    property_graph_store=graph_store, 
    
    kg_extractors=[
        DynamicLLMPathExtractor(
        llm=llm
    )],
    use_async=True,
    show_progress=True
    )

In [14]:
# setup observability
from langfuse import get_client
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor

langfuse = get_client()
 
# Verify connection
langfuse_available = False
if langfuse.auth_check():
    langfuse_available = True
    LlamaIndexInstrumentor().instrument()
    print("Langfuse client is authenticated and ready!")
else:
    print("Authentication failed. Please check your credentials and host.")

Langfuse client is authenticated and ready!


In [17]:
# insert nodes: It takes care of existing nodes and relationships. Not an issue if it is run multiple times.
if not langfuse_available:
    gindex.insert_nodes(md_nodes)
else:
    with langfuse.start_as_current_span(name="Building graph index"):
        gindex.insert_nodes(md_nodes)
    langfuse.flush()

Extracting and inferring knowledge graph from text: 100%|██████████| 30/30 [02:49<00:00,  5.66s/it]
Trace ID is not set. Creating generation client with new trace id.
Generating embeddings: 100%|██████████| 3/3 [00:01<00:00,  1.52it/s]
Trace ID is not set. Creating generation client with new trace id.
Generating embeddings: 100%|██████████| 38/38 [00:02<00:00, 16.54it/s]
Trace ID is not set. Creating generation client with new trace id.


## Explore graph

In [15]:
# check hash of nodes (llama nodes, not kg node)
# from hashlib import sha256

# node = md_nodes[0]
# doc_identity = str(node.text) + str(node.metadata)
# print(node.hash)
# print(str(sha256(doc_identity.encode("utf-8", "surrogatepass")).hexdigest()))

In [16]:
docinfo = gindex.docstore.get_all_ref_doc_info()
docstore_nodes = gindex.docstore.get_nodes(node_ids=[n.id_ for n in md_nodes])

In [17]:
len(docinfo), len(docstore_nodes)

0

In [21]:
# get triplets
trips = gindex.property_graph_store.get_triplets()
len(trips)

2342

In [28]:
trips[0]

[EntityNode(label='AI_MODEL', embedding=None, properties={'header_path': '/', 'id': 'language models', 'creation_date': '2025-02-21', 'last_modified_date': '2025-02-21', 'file_size': 1695, 'file_path': 'h:\\Coding\\ml\\llm\\agents\\documentation_agent\\graph_rag\\..\\data\\langchain\\docs\\docs\\concepts\\agents.mdx', 'file_name': 'agents.mdx', 'triplet_source_id': 'ccf913ef-c17b-4f7d-9f51-876277bb10cc'}, name='language models'),
 Relation(label='CANNOT', source_id='language models', target_id='take actions', properties={'header_path': '/', 'creation_date': '2025-02-21', 'last_modified_date': '2025-02-21', 'file_size': 1695, 'file_path': 'h:\\Coding\\ml\\llm\\agents\\documentation_agent\\graph_rag\\..\\data\\langchain\\docs\\docs\\concepts\\agents.mdx', 'file_name': 'agents.mdx', 'triplet_source_id': 'ccf913ef-c17b-4f7d-9f51-876277bb10cc'}),
 EntityNode(label='ACTION', embedding=None, properties={'header_path': '/', 'id': 'take actions', 'creation_date': '2025-02-21', 'last_modified_da

In [33]:
ln = gindex.property_graph_store.get_llama_nodes(node_ids=['ccf913ef-c17b-4f7d-9f51-876277bb10cc'])
ln[0]

TextNode(id_='ccf913ef-c17b-4f7d-9f51-876277bb10cc', embedding=None, metadata={'file_path': 'h:\\Coding\\ml\\llm\\agents\\documentation_agent\\graph_rag\\..\\data\\langchain\\docs\\docs\\concepts\\agents.mdx', 'file_name': 'agents.mdx', 'file_size': 1695, 'creation_date': '2025-02-21', 'last_modified_date': '2025-02-21', 'header_path': '/'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='h:\\Coding\\ml\\llm\\agents\\documentation_agent\\graph_rag\\..\\data\\langchain\\docs\\docs\\concepts\\agents.mdx', node_type='4', metadata={'file_path': 'h:\\Coding\\ml\\llm\\agents\\documentation_agent\\graph_rag\\..\\data\\langchain\\docs\\docs\\concepts\\agents.mdx', 'file_name': 'agents.mdx', 'file_size': 1695, 'crea

In [23]:
# Knowledge Graph Nodes
kgn = gindex.property_graph_store.get()
len(kgn)

1329

In [37]:
str(kgn[0])

"+ operator ({'header_path': '/Messages/LangChain Messages/AIMessageChunk/', 'creation_date': '2025-02-21', 'last_modified_date': '2025-02-21', 'file_size': 15252, 'file_path': 'h:\\\\Coding\\\\ml\\\\llm\\\\agents\\\\documentation_agent\\\\graph_rag\\\\..\\\\data\\\\langchain\\\\docs\\\\docs\\\\concepts\\\\messages.mdx', 'name': '+ operator', 'file_name': 'messages.mdx', 'triplet_source_id': '7c3a0e08-7b05-4c88-b4fd-d28920f23926'})"

In [76]:
# graph schema
graph_schema = gindex.property_graph_store.get_schema()
len(graph_schema['node_props']), len(graph_schema['rel_props']), len(graph_schema['relationships'])

(127, 200, 692)

In [28]:
gindex.property_graph_store.get_schema().keys()

dict_keys(['node_props', 'rel_props', 'relationships', 'metadata'])

## Default Retreival and Query

In [24]:
for doc in documents[7:28]:
    print(doc.id_)

h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\agents.mdx
h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\architecture.mdx
h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\async.mdx
h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\callbacks.mdx
h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\chat_history.mdx
h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\chat_models.mdx
h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\document_loaders.mdx
h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\embedding_models.mdx
h:\Coding\ml\llm\agents\documentation_agent\notebooks\..\data\langchain\docs\docs\concepts\evaluation.mdx
h:\Coding\ml\llm\agents\documentation_a

In [25]:
# Concatenate documents with filename and content
concatenated_docs = ""
for doc in documents[7:28]:
    file_path = doc.metadata['file_path']
    path_parts = file_path.split('\\')
    file_name = '\\'.join(path_parts[-2:])
    concatenated_docs += f"\n\n=== File: {file_name} ===\n\n"
    concatenated_docs += doc.text
    concatenated_docs += "\n\n=== End File ===\n"
len(concatenated_docs) / 4

35805.25

In [26]:
queries = [
    "what are the functions in BaseStores in langchain?",
    "what are the different types of retreivers supported by langchain?",
]

In [27]:

# Define retriever
retriever = gindex.as_retriever(
    include_text=True,  # include source text in returned nodes, default True
)

In [28]:
if not langfuse_available:
    results = retriever.retrieve(queries[1])
else:
    with langfuse.start_as_current_span(name="retieve graph index"):
        results = retriever.retrieve(queries[1])
    langfuse.flush()

In [30]:
len(results)

6

In [29]:

for record in results:
    print(record.text)
    print('*'*100)

Here are some facts extracted from the provided text:

Retrieval -> REQUIRES -> Retrievers

## Information retrieval
****************************************************************************************************
Here are some facts extracted from the provided text:

Retrievers -> TYPE_OF -> Common types
Retrievers -> RETURN -> list of Document objects
Retrievers -> DO_NOT_NEED_TO -> store documents
Retrievers -> CAN_BE_BUILT_ON -> search APIs
Retrievers -> BUILT_ON -> graph databases
Retrievers -> BUILT_ON -> relational database
Retrievers -> UTILIZE -> index

### Source document retention 

Many retrievers utilize some kind of index to make documents easily searchable.
The process of indexing can include a transformation step (e.g., vectorstores often use document splitting). 
Whatever transformation is used, can be very useful to retain a link between the *transformed document* and the original, giving the retriever the ability to return the *original* document.

![Retrieval wi

In [31]:
query_engine = gindex.as_query_engine(llm = llm, include_text=True)#, response_synthesizer=response_synthesizer)
# query_engine = gindex.as_query_engine(
#     include_text=False, response_mode="tree_summarize"
# )

In [32]:
if not langfuse_available:
    response = query_engine.query(queries[1])
else:
    with langfuse.start_as_current_span(name="query gindex"):
        response = query_engine.query(queries[1])
    langfuse.flush()

# if langfuse_available:
#     instrumentor.start()
#     with instrumentor.observe(trace_name="query gindex", user_id="sparsh") as abc:
#         response = query_engine.query(queries[1])
#     instrumentor.flush()
#     instrumentor.stop()
# else:
#     print('langfuse not setup')
#     response = query_engine.query(queries[1])


In [33]:

print(response)

LangChain supports various types of retrievers, including those built on:

*   **Lexical search algorithms/engines:** Such as BM25, TF-IDF, and Elasticsearch.
*   **Search APIs:** Examples include integrations with Amazon Kendra and Wikipedia Search.

The framework also provides a uniform interface for interacting with different retrieval systems like vectorstores, graph databases, and relational databases.


In [34]:
print(response.get_formatted_sources())

> Source (Doc id: e507fe1c-4f7f-40e6-b32b-29b01fe2309d): Here are some facts extracted from the provided text:

LangChain's retriever class -> REQUIRES_IM...

> Source (Doc id: cca2bfa1-311e-46ac-b7b8-00a986d86a91): Here are some facts extracted from the provided text:

documents -> TYPE_OF -> LangChain Document...

> Source (Doc id: eb622ec4-e081-4d53-8911-ec764ff8daf4): Here are some facts extracted from the provided text:

search APIs -> RETURN -> search results
Am...

> Source (Doc id: dc0595ab-2a91-4804-a5ee-a8696ce45eeb): Here are some facts extracted from the provided text:

BM25 -> INTEGRATION -> retriever integrati...


## Retrieval Advance

In [36]:
from llama_index.core.indices.property_graph import LLMSynonymRetriever
from llama_index.core.indices.property_graph import VectorContextRetriever
from llama_index.core.retrievers import TextToCypherRetriever
from llama_index.core.indices.property_graph import CypherTemplateRetriever

In [37]:
def validate_cypher(cypher):
    print(str(cypher))
    return cypher

t2c_retriever = TextToCypherRetriever(
    graph_store=graph_store,
    llm=llm,
    include_text=True,
    # cypher_validator=validate_cypher,
)


In [None]:


if not langfuse_available:
     res = t2c_retriever.retrieve(queries[1])
else:
    with langfuse.start_as_current_span(name="text to cypher retreive"):
         res = t2c_retriever.retrieve(queries[1])
    langfuse.flush()


In [40]:
res[0].node

TextNode(id_='9e7f05cc-9040-47aa-8d5f-5d086d52eae0', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='Generated Cypher query:\nMATCH (f:FRAMEWORK {name: "langchain"})-[:HAS]->(r:RETRIEVER)\nRETURN r.name AS retriever_type\n\nCypher Response:\n[]', mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}')

In [41]:
vector_retriever = VectorContextRetriever(
    graph_store=graph_store,
    llm=llm,
    include_text=False,
    include_properties=False,
    embed_model=embed_model,
    similarity_top_k=20,
    limit=30,
    path_depth=2
)

In [46]:
query = queries[1]
query

'what are the different types of retreivers supported by langchain?'

In [44]:
if not langfuse_available:
     retrieved_nodes = vector_retriever.retrieve(query)
else:
    with langfuse.start_as_current_span(name="vector context retreive"):
         retrieved_nodes = vector_retriever.retrieve(query)
    langfuse.flush()

In [48]:
retrieved_nodes[:3]

[NodeWithScore(node=TextNode(id_='a92393df-9a75-49cc-8801-96fe071ac6b0', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='e507fe1c-4f7f-40e6-b32b-29b01fe2309d', node_type=None, metadata={}, hash=None)}, metadata_template='{key}: {value}', metadata_separator='\n', text="LangChain's retriever class -> REQUIRES_IMPLEMENTATION_OF -> _get_relevant_documents method", mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.8507366180419922),
 NodeWithScore(node=TextNode(id_='c2245a8a-7905-4c32-886b-6c493926250a', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='cca2bfa1-311e-46ac-b7b8-00a986d86a91', node_type=None, metadata={}, hash=None)}, metadata_template='{key}: {value}', meta

In [45]:
print(retrieved_nodes[0].text)

LangChain's retriever class -> REQUIRES_IMPLEMENTATION_OF -> _get_relevant_documents method


In [49]:
from llama_index.core.response_synthesizers import (
    BaseSynthesizer,
    ResponseMode,
    get_response_synthesizer,
)

response_synthesizer = get_response_synthesizer(llm=llm)

In [50]:

print('query:', query)


query: what are the different types of retreivers supported by langchain?


In [51]:
if not langfuse_available:
    response = response_synthesizer.synthesize(query=query, nodes=retrieved_nodes)
else:
    with langfuse.start_as_current_span(name="response synthesizer"):
        response = response_synthesizer.synthesize(query=query, nodes=retrieved_nodes)
    langfuse.flush()

In [54]:
response.response


'LangChain supports various types of retrievers, including integrations such as Amazon Kendra, Wikipedia Search, BM25, and Elasticsearch. Additionally, retrievers can be built from vectorstores and through text-to-SQL conversion.'

In [58]:
print(response.get_formatted_sources())

> Source (Doc id: a92393df-9a75-49cc-8801-96fe071ac6b0): LangChain's retriever class -> REQUIRES_IMPLEMENTATION_OF -> _get_relevant_documents method

> Source (Doc id: c2245a8a-7905-4c32-886b-6c493926250a): documents -> TYPE_OF -> LangChain Document

> Source (Doc id: 0bd5c8c1-d45f-4161-ba2f-0e71532ef26f): search APIs -> RETURN -> search results

> Source (Doc id: 00b0ca2c-3a50-4cf3-b297-028df8c7be74): Amazon Kendra -> IS_A -> retriever integration

> Source (Doc id: 69e96ade-d529-4af8-ac9c-deab7d8650a9): Wikipedia Search -> IS_A -> retriever integration

> Source (Doc id: 35970c85-3764-47a8-8afc-5eb407d8e729): BM25 -> INTEGRATION -> retriever integration

> Source (Doc id: 39f1f453-ae26-4a47-a339-2fa9bdbd79bf): Elasticsearch -> INTEGRATION -> retriever integration

> Source (Doc id: db171c9c-5f12-423b-abfe-c86e5aa63f56): LangChain retriever -> IS_A -> runnable

> Source (Doc id: be8848d8-1fa9-4c64-9a25-258e184521a7): text-to-SQL conversion -> USED_FOR -> build a retriever

> Source (D

In [59]:
response.source_nodes[0]

NodeWithScore(node=TextNode(id_='a92393df-9a75-49cc-8801-96fe071ac6b0', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='e507fe1c-4f7f-40e6-b32b-29b01fe2309d', node_type=None, metadata={}, hash=None)}, metadata_template='{key}: {value}', metadata_separator='\n', text="LangChain's retriever class -> REQUIRES_IMPLEMENTATION_OF -> _get_relevant_documents method", mimetype='text/plain', start_char_idx=None, end_char_idx=None, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.8507366180419922)

## Neo4j Cypher queries

#### 1. Find the node containing the specific text

```bash
// 1. Find the node containing the specific text
MATCH (target_node)
WHERE target_node.text CONTAINS "This package contains third-party integrations that are maintained by the LangChain community." // Or target_node.description, target_node.name etc.

// 2. Get its immediate neighbors and the relationships
OPTIONAL MATCH (target_node)-[r]-(neighbor)

// 3. Return the target node, the relationships, and the neighbors
RETURN target_node, r, neighbor
```

2. Find Duplicate Nodes

lost the query

In [83]:
# duplicates
dup_ent = [["/", "/Chat models/"], ["LangChain Framework", "LangChain framework"], ["LangChain Framework", "LangChain framework"], ["appropriately lightweight", "lightweight", "lightweight dependencies"], ["all integrations", "integrations", "third-party integrations"], ["langchain package", "package", "packages"], ["all integrations", "integrations", "third-party integrations"], ["langchain package", "package", "packages"], ["appropriately lightweight", "lightweight", "lightweight dependencies"], ["all integrations", "integrations", "third-party integrations"], ["appropriately lightweight", "lightweight", "lightweight dependencies"], ["debug", "test"], ["debug", "test"], ["LangServe documentation", "LangSmith documentation"], ["Asynchronous programming", "asynchronous programming"], ["Asynchronous programming", "asynchronous programming"], ["async methods", "method"], ["async methods", "method"], ["event loop", "tasks on the event loop"], ["event loop", "tasks on the event loop"], ["curio", "trio"], ["RunnableConfig", "RunnableConfig propagation"], ["Python 3.10", "Python 3.11", "Python 3.9"], ["Python 3.10", "Python 3.11", "Python 3.9"], ["Python 3.10", "Python 3.11", "Python 3.9"], ["RunnableConfig", "RunnableConfig propagation"], ["IPython", "IPython REPL"], ["IPython", "IPython REPL"], ["`on_llm_end`", "`on_llm_error`"], ["LLM ends", "LLM errors"], ["llm ends", "llm errors"], ["`on_llm_end`", "`on_llm_error`"], ["Async callback handlers", "Callback handlers", "Sync callback handlers"], ["async", "sync"], ["Async callback handlers", "Callback handlers", "Sync callback handlers"], ["AsyncCallbackHandler interface", "BaseCallbackHandler interface"], ["Async callback handlers", "Callback handlers", "Sync callback handlers"], ["AsyncCallbackHandler interface", "BaseCallbackHandler interface"], ["AsyncCallbackManager", "CallbackManager"], ["AsyncCallbackManager", "CallbackManager"], ["Request time callbacks", "request time callbacks"], ["all children", "any children"], ["all children", "any children"], ["Request time callbacks", "request time callbacks"], ["system message", "tool message", "user message"], ["Conversation", "conversation", "conversation structure"], ["system message", "tool message", "user message"], ["specific task", "specific tasks"], ["specific task", "specific tasks"], ["first message", "last message"], ["first message", "last message"], ["system message", "tool message", "user message"], ["Conversation", "conversation", "conversation structure"], ["long-term memory", "short-term memory"], ["long-term memory", "short-term memory"], ["BaseChatModel", "BaseChatModel interface"], ["BaseChatModel", "BaseChatModel interface"], ["BaseLLM", "BaseLLM interface"], ["BaseLLM", "BaseLLM interface"], ["processing data", "processing text"], ["processing data", "processing text"], ["audio", "video"], ["audio", "video"], ["multimodal inputs", "multimodal outputs"], ["multimodal inputs", "multimodal outputs"], ["context window", "large context windows"], ["entire input", "input", "input sequence", "meaning of the input"], ["context window", "large context windows"], ["entire input", "input", "input sequence", "meaning of the input"], ["entire input", "input", "input sequence", "meaning of the input"], ["/", "/Chat models/"], ["Rate limit", "Rate limits"], ["Rate limit error", "rate limit error"], ["Spacing out requests", "spacing out requests"], ["Rate limit error", "rate limit error"], ["Chat model", "ChatModel", "all chat models", "another chat model", "chat model", "different chat models"], ["Chat model", "ChatModel", "all chat models", "another chat model", "chat model", "different chat models"], ["Rate limit", "Rate limits"], ["Spacing out requests", "spacing out requests"], ["Caching", "Caching chat model responses"], ["Caching", "Caching chat model responses"], ["entire input", "input", "input sequence", "meaning of the input"], ["How-to guides", "how-to guides"], ["Common retrieval systems", "Retrieval systems", "retrieval systems"], ["Embedding", "embedding", "embedding space"], ["BERT", "BERT architecture", "SBERT"], ["BERT", "BERT architecture", "SBERT"], ["BERT", "BERT architecture", "SBERT"], ["Cosine Similarity", "cosine similarity"], ["Embedding", "embedding", "embedding space"], ["Embedding", "embedding", "embedding space"], ["Cosine Similarity", "cosine similarity"], ["langchain package", "package", "packages"], ["LangServe documentation", "LangSmith documentation"], ["Evaluation", "evaluation"], ["Evaluation", "evaluation"], ["Example selectors", "example selectors"], ["Few-shot prompting", "prompting"], ["Example selectors", "example selectors"], ["LLM feedback", "User feedback"], ["LLM feedback", "User feedback"], ["latency", "latency constraints"], ["latency", "latency constraints"], ["keyword-based similarity", "similarity"], ["keyword-based similarity", "similarity"], ["AI application", "AI applications", "LLM applications"], ["API Reference", "API reference", "Document API reference"], ["BaseStore", "BaseStore[str, bytes]", "BaseStores"], ["BaseStore", "BaseStore[str, bytes]", "BaseStores"], ["BaseStore", "BaseStore[str, bytes]", "BaseStores"], ["mdelete", "mget", "mset"], ["mdelete", "mget", "mset"], ["mdelete", "mget", "mset"], ["key", "key_value_pairs"], ["key", "key_value_pairs"], ["LCEL chains", "LLMChain"], ["final_output", "output", "output1"], ["runnable1", "runnable2"], ["runnable1", "runnable2"], ["runnable1.invoke", "runnable2.invoke"], ["final_output", "output", "output1"], ["runnable1.invoke", "runnable2.invoke"], ["final_output", "output", "output1"], ["asynchronous execution", "synchronous execution"], ["asynchronous execution", "synchronous execution"], ["`|` operator", "| operator"], ["`|` operator", "| operator"], ["automatic type coercion", "type coercion"], ["automatic type coercion", "type coercion"], ["LCEL chains", "LLMChain"], ["Chat model", "ChatModel", "all chat models", "another chat model", "chat model", "different chat models"], ["ID", "Name"], ["ID", "Name"], ["Name property", "name property"], ["Name property", "name property"], ["Conversation", "conversation", "conversation structure"], ["Chat model", "ChatModel", "all chat models", "another chat model", "chat model", "different chat models"], ["system role", "tool role", "user role"], ["system role", "tool role", "user role"], ["system role", "tool role", "user role"], ["langchain_core.messages", "langchain_core.messages.utils"], ["single string", "string"], ["invalid_tool_calls", "tool_calls"], ["image_url", "type", "url"], ["astream", "astream_events"], ["astream", "astream_events"], ["langchain_core.messages", "langchain_core.messages.utils"], ["text, images, audio", "text, images, audio, video"], ["text, images, audio", "text, images, audio, video"], ["image_url", "type", "url"], ["image_url", "type", "url"], ["multimodal retrieval", "retrieval", "retrieval tasks"], ["multimodal retrieval", "retrieval", "retrieval tasks"], ["multimodal retrieval", "retrieval", "retrieval tasks"], ["CSV", "Str"], ["Prompt Templates", "prompt templates"], ["Chat model", "ChatModel", "all chat models", "another chat model", "chat model", "different chat models"], ["single string", "string"], ["Prompt Templates", "prompt templates"], ["Retrievers", "retrievers"], ["Common retrieval systems", "Retrieval systems", "retrieval systems"], ["Data", "Structured data", "Unstructured text", "all types of data", "unstructured data"], ["Relational databases", "relational database", "relational databases"], ["Graph databases", "graph database", "graph databases"], ["Data", "Structured data", "Unstructured text", "all types of data", "unstructured data"], ["Translation", "translation"], ["Search queries", "search query"], ["Translation", "translation"], ["Text to SQL", "Text-to-SQL", "text-to-SQL", "text-to-SQL conversion"], ["Natural Language", "natural language to Cypher", "natural language to SQL"], ["Text-to-Cypher", "text-to-Cypher"], ["Natural Language", "natural language to Cypher", "natural language to SQL"], ["Metadata Filters", "metadata filter"], ["Metadata Filters", "metadata filter"], ["Text to SQL", "Text-to-SQL", "text-to-SQL", "text-to-SQL conversion"], ["/Retrieval/", "/Retrieval/Information retrieval"], ["/Retrieval/", "/Retrieval/Information retrieval"], ["Common retrieval systems", "Retrieval systems", "retrieval systems"], ["index", "inverted index"], ["Data", "Structured data", "Unstructured text", "all types of data", "unstructured data"], ["documents", "list of documents", "store documents"], ["Relational databases", "relational database", "relational databases"], ["data into tables", "table", "tables"], ["data into tables", "table", "tables"], ["data into tables", "table", "tables"], ["Data", "Structured data", "Unstructured text", "all types of data", "unstructured data"], ["Graph databases", "graph database", "graph databases"], ["documents", "list of documents", "store documents"], ["Document objects", "list of Document objects"], ["query analysis", "query analysis techniques"], ["Text to SQL", "Text-to-SQL", "text-to-SQL", "text-to-SQL conversion"], ["Retrievers", "retrievers"], ["Document objects", "list of Document objects"], ["documents", "list of documents", "store documents"], ["Relational databases", "relational database", "relational databases"], ["Text to SQL", "Text-to-SQL", "text-to-SQL", "text-to-SQL conversion"], ["Graph databases", "graph database", "graph databases"], ["Text-to-Cypher", "text-to-Cypher"], ["Search queries", "search query"], ["ParentDocument", "ParentDocument retriever"], ["ParentDocument", "ParentDocument retriever"]]
dup_ent

[['/', '/Chat models/'],
 ['LangChain Framework', 'LangChain framework'],
 ['LangChain Framework', 'LangChain framework'],
 ['appropriately lightweight', 'lightweight', 'lightweight dependencies'],
 ['all integrations', 'integrations', 'third-party integrations'],
 ['langchain package', 'package', 'packages'],
 ['all integrations', 'integrations', 'third-party integrations'],
 ['langchain package', 'package', 'packages'],
 ['appropriately lightweight', 'lightweight', 'lightweight dependencies'],
 ['all integrations', 'integrations', 'third-party integrations'],
 ['appropriately lightweight', 'lightweight', 'lightweight dependencies'],
 ['debug', 'test'],
 ['debug', 'test'],
 ['LangServe documentation', 'LangSmith documentation'],
 ['Asynchronous programming', 'asynchronous programming'],
 ['Asynchronous programming', 'asynchronous programming'],
 ['async methods', 'method'],
 ['async methods', 'method'],
 ['event loop', 'tasks on the event loop'],
 ['event loop', 'tasks on the event lo