#  Graph RAG using LangChain and Kuzu 

Taken  from https://pypi.org/project/langchain-kuzu/ 

In [1]:
# A default setup cell.
# It imports environment variables, define 'devtools.debug" as a buildins, set PYTHONPATH, and code auto-reload
# Copy it in other Notebooks

import builtins

from devtools import debug
from dotenv import load_dotenv

setattr(builtins, "debug", debug)
load_dotenv(verbose=True)
%load_ext autoreload
%autoreload 2
%reset -f
!export PYTHONPATH=":./python"
# cSpell: disable

In [2]:
from pathlib import Path

import kuzu
from ipycytoscape import CytoscapeWidget
from langchain_core.documents import Document
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_kuzu.chains.graph_qa.kuzu import KuzuQAChain
from langchain_kuzu.graphs.kuzu_graph import KuzuGraph
from langchain_openai import ChatOpenAI


In [3]:
DB = "test_db"

if not Path(DB).exists():
    text = "Tim Cook is the CEO of Apple. Apple has its headquarters in California."

    # Define schema
    allowed_nodes = ["Person", "Company", "Location"]
    allowed_relationships = [
        ("Person", "IS_CEO_OF", "Company"),
        ("Company", "HAS_HEADQUARTERS_IN", "Location"),
    ]
    # Define the LLMGraphTransformer
    llm_transformer = LLMGraphTransformer(
        llm=ChatOpenAI(model="gpt-4o-mini", temperature=0),
        allowed_nodes=allowed_nodes,
        allowed_relationships=allowed_relationships,
    )

    # Convert the given text into graph documents
    documents = [Document(page_content=text)]
    graph_documents = llm_transformer.convert_to_graph_documents(documents)
    db = kuzu.Database(DB)
    graph = KuzuGraph(db, allow_dangerous_requests=True)

    # Add the graph document to the graph
    graph.add_graph_documents(
        graph_documents,
        include_source=True,
    )
else:
    print("load {DB}")


load {DB}


In [4]:
db = kuzu.Database(DB)
graph = KuzuGraph(db, allow_dangerous_requests=True)

# Create the KuzuQAChain with verbosity enabled to see the generated Cypher queries
chain = KuzuQAChain.from_llm(
    llm=ChatOpenAI(model="gpt-4o-mini", temperature=0.3),
    graph=graph,
    verbose=True,
    allow_dangerous_requests=True,
)

# Query the graph
queries = [
    "Who is the CEO of Apple?",
    "Where is Apple headquartered?",
]

for query in queries:
    result = chain.invoke(query)
    print(f"Query: {query}\nResult: {result}\n")



[1m> Entering new KuzuQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Person)-[:IS_CEO_OF]->(c:Company) WHERE LOWER(c.id) = 'apple' RETURN p[0m
Full Context:
[32;1m[1;3m[{'p': {'_id': {'offset': 0, 'table': 3}, '_label': 'Person', 'id': 'Tim Cook', 'type': 'entity'}}][0m

[1m> Finished chain.[0m
Query: Who is the CEO of Apple?
Result: {'query': 'Who is the CEO of Apple?', 'result': 'Tim Cook is the CEO of Apple.'}



[1m> Entering new KuzuQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (c:Company {id: LOWER('Apple')})-[:HAS_HEADQUARTERS_IN]->(l:Location) RETURN l[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m
Query: Where is Apple headquartered?
Result: {'query': 'Where is Apple headquartered?', 'result': "I don't know the answer."}



In [7]:
# Get all nodes and relationships in Cytoscape JSON format
cyto_data = {"nodes": [], "edges": []}

# Query all nodes
nodes_query = "MATCH (n) RETURN n"
nodes_result = graph.query(nodes_query)

# Add nodes to Cytoscape data, excluding text_chunk nodes
for node in nodes_result:
    if node["n"]["type"] != "text_chunk":
        cyto_data["nodes"].append(
            {"data": {"id": node["n"]["id"], "label": node["n"]["_label"], "type": node["n"]["type"]}}
        )

# Query all relationships
rels_query = "MATCH (a)-[r]->(b) RETURN a, r, b"
rels_result = graph.query(rels_query)

# Add relationships to Cytoscape data, excluding those involving text_chunk nodes
for rel in rels_result:
    if rel["a"]["type"] != "text_chunk" and rel["b"]["type"] != "text_chunk":
        cyto_data["edges"].append(
            {
                "data": {
                    "id": f"{rel['a']['id']}-{rel['r']['_label']}-{rel['b']['id']}",
                    "source": rel["a"]["id"],
                    "target": rel["b"]["id"],
                    "label": rel["r"]["_label"],
                }
            }
        )

# Create Cytoscape widget
cyto = CytoscapeWidget()
cyto.graph.add_graph_from_json(cyto_data)

# Set layout and style
cyto.set_layout( animate=True)
# cyto.set_style(
#     [
#         {
#             "selector": "node",
#             "css": {
#                 "content": "data(label)",
#                 "text-valign": "center",
#                 "color": "white",
#                 "text-outline-width": 2,
#                 "text-outline-color": "#888",
#                 "background-color": "#666",
#             },
#         },
#         {
#             "selector": "edge",
#             "css": {
#                 "content": "data(label)",
#                 "curve-style": "bezier",
#                 "target-arrow-shape": "triangle",
#                 "line-color": "#999",
#                 "target-arrow-color": "#999",
#             },
#         },
#     ]
# )

# Display the graph
cyto

CytoscapeWidget(cytoscape_layout={'name': 'cola', 'animate': True}, cytoscape_style=[{'selector': 'node', 'css…

In [22]:
cyto_data

{'nodes': [{'data': {'id': '9cb60013a7fbc1d4ad44a7ddaef20cbc',
    'label': 'Chunk',
    'type': 'text_chunk'}},
  {'data': {'id': 'Apple', 'label': 'Company', 'type': 'entity'}},
  {'data': {'id': 'California', 'label': 'Location', 'type': 'entity'}},
  {'data': {'id': 'Tim Cook', 'label': 'Person', 'type': 'entity'}}],
 'edges': [{'data': {'id': '9cb60013a7fbc1d4ad44a7ddaef20cbc-MENTIONS_Chunk_Person-Tim Cook',
    'source': '9cb60013a7fbc1d4ad44a7ddaef20cbc',
    'target': 'Tim Cook',
    'label': 'MENTIONS_Chunk_Person'}},
  {'data': {'id': '9cb60013a7fbc1d4ad44a7ddaef20cbc-MENTIONS_Chunk_Company-Apple',
    'source': '9cb60013a7fbc1d4ad44a7ddaef20cbc',
    'target': 'Apple',
    'label': 'MENTIONS_Chunk_Company'}},
  {'data': {'id': '9cb60013a7fbc1d4ad44a7ddaef20cbc-MENTIONS_Chunk_Location-California',
    'source': '9cb60013a7fbc1d4ad44a7ddaef20cbc',
    'target': 'California',
    'label': 'MENTIONS_Chunk_Location'}},
  {'data': {'id': 'Apple-HAS_HEADQUARTERS_IN-California',
   