# Building a local Knowledge Graph RAG with Neo4j, LangChain, and Ollama

## Preliminary: 
### Installs

In [1]:
!python -V
!pip install -U -r requirements.txt



Python 3.12.3
Collecting langchain-community==0.4.0 (from -r requirements.txt (line 7))
  Using cached langchain_community-0.4-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-text-splitters==1.0.0 (from -r requirements.txt (line 8))
  Using cached langchain_text_splitters-1.0.0-py3-none-any.whl.metadata (2.6 kB)
Collecting langchain-ollama==1.0.1 (from -r requirements.txt (line 11))
  Using cached langchain_ollama-1.0.1-py3-none-any.whl.metadata (2.5 kB)
Collecting neo4j==5.25.0 (from -r requirements.txt (line 14))
  Using cached neo4j-5.25.0-py3-none-any.whl.metadata (5.7 kB)
Collecting python-dotenv==1.0.1 (from -r requirements.txt (line 16))
  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting ollama<1.0.0,>=0.6.0 (from langchain-ollama==1.0.1->-r requirements.txt (line 11))
  Using cached ollama-0.6.1-py3-none-any.whl.metadata (4.3 kB)
Using cached langchain_text_splitters-1.0.0-py3-none-any.whl (33 kB)
Using cached langchain_community-0.4-py3-non

### Environment variables

In [3]:
from dotenv import load_dotenv
load_dotenv()  


True

In [14]:
import os

assert os.getenv("DIFFBOT_API_KEY")
assert os.getenv("NEO4J_URI") == "bolt://localhost:7687"
assert os.getenv("NEO4J_DATABASE") == "shop"

DIFFBOT_API_KEY = os.getenv("DIFFBOT_API_KEY")
NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")

print("DIFFBOT_API_KEY:", bool(DIFFBOT_API_KEY))
print("NEO4J_URI:", NEO4J_URI)
print("NEO4J_DATABASE:", NEO4J_DATABASE)
print("NEO4J_PASSWORD:", NEO4J_PASSWORD)
print("NEO4J_PASSWORD:", NEO4J_USERNAME)


print(".env loaded correctly")



DIFFBOT_API_KEY: True
NEO4J_URI: bolt://localhost:7687
NEO4J_DATABASE: shop
NEO4J_PASSWORD: password
NEO4J_PASSWORD: neo4j
.env loaded correctly


## Extract graph content
### Load wikipedia documents

In [10]:
from langchain_community.document_loaders import WikipediaLoader

TOPIC = "Stray Kids"   
docs = WikipediaLoader(query=TOPIC, load_max_docs=2).load()

len(docs), docs[0].metadata, docs[0].page_content[:500]


(2,
 {'title': 'Stray Kids',
  'summary': 'Stray Kids (often abbreviated to SKZ; Korean: 스트레이 키즈; RR: Seuteurei Kijeu) is a South Korean boy band formed by JYP Entertainment. The band has eight members: Bang Chan, Lee Know, Changbin, Hyunjin, Han, Felix, Seungmin, and I.N. Former member Woojin left the band in 2019. Stray Kids primarily self-produces its recordings; the main production team is named 3Racha and consists of Bang Chan, Changbin, and Han, and the other members frequently participate in songwriting.\nThe leader, Bang Chan, personally selected each member to be a part of the band before filming the eponymous 2017 reality television show, which is unusual in K-pop, where that authority is usually held by the agency\'s executives and creative directors. The band released their unofficial debut extended play (EP) Mixtape in January 2018 and officially debuted on March 25 with the EP I Am Not, which was followed by the EPs I Am Who and I Am You, completing the I Am EP series. Th

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
split_docs = splitter.split_documents(docs)
len(split_docs), split_docs[0].page_content[:300]


(9,
 'Stray Kids (often abbreviated to SKZ; Korean: 스트레이 키즈; RR: Seuteurei Kijeu) is a South Korean boy band formed by JYP Entertainment. The band has eight members: Bang Chan, Lee Know, Changbin, Hyunjin, Han, Felix, Seungmin, and I.N. Former member Woojin left the band in 2019. Stray Kids primarily self')

### Extract graphs

In [12]:
from langchain_experimental.graph_transformers import DiffbotGraphTransformer

if not DIFFBOT_API_KEY:
    raise ValueError("Missing DIFFBOT_API_KEY env var. Set it locally; do not commit it.")

transformer = DiffbotGraphTransformer(diffbot_api_key=DIFFBOT_API_KEY)

graph_docs = transformer.convert_to_graph_documents(split_docs)
len(graph_docs), type(graph_docs[0])


(9, langchain_community.graphs.graph_document.GraphDocument)

In [13]:
#Display the graph extracted
gd = graph_docs[0]
gd.nodes[:5], gd.relationships[:5]


([Node(id='http://www.wikidata.org/entity/Q59670293', type='Person', properties={'name': 'Bang Chan'}),
  Node(id='http://www.wikidata.org/entity/Q46134670', type='Organization', properties={'name': 'Stray Kids'}),
  Node(id='Woojin', type='Person', properties={'name': 'Woojin'})],
 [Relationship(source=Node(id='http://www.wikidata.org/entity/Q59670293', type='Person', properties={}), target=Node(id='http://www.wikidata.org/entity/Q46134670', type='Organization', properties={}), type='EMPLOYEE_OR_MEMBER_OF', properties={'evidence': 'Stray Kids primarily self-produces its recordings; the main production team is named 3Racha and consists of Bang Chan, Changbin, and Han, and the other members frequently participate in songwriting.', 'isCurrent': 'true'}),
  Relationship(source=Node(id='Woojin', type='Person', properties={}), target=Node(id='http://www.wikidata.org/entity/Q46134670', type='Organization', properties={}), type='EMPLOYEE_OR_MEMBER_OF', properties={'evidence': 'Stray Kids prim

## Neo4j

### Connect and build the graphs

In [16]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

def run_cypher(q, params=None):
    params = params or {}
    with driver.session(database=NEO4J_DATABASE) as s:
        return list(s.run(q, params))

# ⚠️ Clean DB
run_cypher("MATCH (n) DETACH DELETE n")
run_cypher("SHOW DATABASES")


[<Record name='shop' type='standard' aliases=[] access='read-write' address='localhost:7687' role='primary' writer=True requestedStatus='online' currentStatus='online' statusMessage='' default=True home=True constituents=[]>,
 <Record name='system' type='system' aliases=[] access='read-write' address='localhost:7687' role='primary' writer=True requestedStatus='online' currentStatus='online' statusMessage='' default=False home=False constituents=[]>]

In [None]:
from langchain_neo4j import Neo4jGraph

graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database=NEO4J_DATABASE
)

graph.add_graph_documents(graph_docs)

graph.refresh_schema()
print(graph.schema)


Node properties:
Person {id: STRING, name: STRING}
Organization {id: STRING, name: STRING}
Location {id: STRING, name: STRING}
Relationship properties:
EMPLOYEE_OR_MEMBER_OF {evidence: STRING, isCurrent: STRING}
WORK_RELATIONSHIP {evidence: STRING}
FOUNDED_BY {evidence: STRING}
ORGANIZATION_LOCATIONS {evidence: STRING}
The relationships:
(:Person)-[:EMPLOYEE_OR_MEMBER_OF]->(:Organization)
(:Person)-[:WORK_RELATIONSHIP]->(:Person)
(:Organization)-[:ORGANIZATION_LOCATIONS]->(:Location)
(:Organization)-[:FOUNDED_BY]->(:Person)


### Inspect graphs with cypher

In [18]:
run_cypher("CALL db.labels() YIELD label RETURN label ORDER BY label")


[<Record label='Location'>,
 <Record label='Organization'>,
 <Record label='Person'>]

In [19]:
run_cypher("CALL db.relationshipTypes() YIELD relationshipType RETURN relationshipType ORDER BY relationshipType")


[<Record relationshipType='EMPLOYEE_OR_MEMBER_OF'>,
 <Record relationshipType='FOUNDED_BY'>,
 <Record relationshipType='ORGANIZATION_LOCATIONS'>,
 <Record relationshipType='WORK_RELATIONSHIP'>]

In [20]:
run_cypher("MATCH (n) RETURN labels(n) as labels, n.name as name, n.id as id LIMIT 25")


[<Record labels=['Person'] name='Bang Chan' id='http://www.wikidata.org/entity/Q59670293'>,
 <Record labels=['Organization'] name='Stray Kids' id='http://www.wikidata.org/entity/Q46134670'>,
 <Record labels=['Person'] name='Woojin' id='Woojin'>,
 <Record labels=['Person'] name='Felix' id='Felix'>,
 <Record labels=['Person'] name='Lee Know' id='http://www.wikidata.org/entity/Q59260597'>,
 <Record labels=['Location'] name='Japan' id='http://www.wikidata.org/entity/Q17'>]

In [21]:
run_cypher("""
MATCH (p)
WHERE (p.name IS NOT NULL AND toLower(p.name) CONTAINS toLower($name))
   OR (p.id   IS NOT NULL AND toLower(p.id)   CONTAINS toLower($name))
WITH p LIMIT 5
MATCH (p)-[r]-(o)
RETURN labels(p) AS pLabels, p.name AS pName,
       type(r) AS rel, labels(o) AS oLabels, o.name AS oName
LIMIT 50
""", {"name": "Stray Kids"})


[<Record pLabels=['Organization'] pName='Stray Kids' rel='ORGANIZATION_LOCATIONS' oLabels=['Location'] oName='Japan'>,
 <Record pLabels=['Organization'] pName='Stray Kids' rel='FOUNDED_BY' oLabels=['Person'] oName='Bang Chan'>,
 <Record pLabels=['Organization'] pName='Stray Kids' rel='EMPLOYEE_OR_MEMBER_OF' oLabels=['Person'] oName='Woojin'>,
 <Record pLabels=['Organization'] pName='Stray Kids' rel='EMPLOYEE_OR_MEMBER_OF' oLabels=['Person'] oName='Bang Chan'>]

In [23]:
run_cypher("""
MATCH (p)
WHERE p.name IS NOT NULL AND toLower(p.name) CONTAINS toLower($name)
WITH p, COUNT {(p)--()} AS degree
RETURN p.name AS name, labels(p) AS labels, p.id AS id, degree
ORDER BY degree DESC
LIMIT 10
""", {"name":"Stray Kids"})


[<Record name='Stray Kids' labels=['Organization'] id='http://www.wikidata.org/entity/Q46134670' degree=4>]

## Function to get all relationships for an entity

In [26]:
def get_person_relationships(person_name: str, limit_nodes: int = 1, limit_edges: int = 200):
    q = """
    MATCH (p)
    WHERE (p.name IS NOT NULL AND toLower(p.name) CONTAINS toLower($name))
    WITH p, COUNT {(p)--()} AS degree
    ORDER BY degree DESC
    LIMIT $limit_nodes
    MATCH (p)-[r]-(o)
    RETURN p.name AS person, labels(p) AS personLabels,
           type(r) AS rel, o.name AS other, labels(o) AS otherLabels,
           properties(r) AS relProps, properties(o) AS otherProps
    LIMIT $limit_edges
    """
    return run_cypher(q, {"name": person_name, "limit_nodes": limit_nodes, "limit_edges": limit_edges})

rows = get_person_relationships("Stray Kids")
rows[:3]


[<Record person='Stray Kids' personLabels=['Organization'] rel='ORGANIZATION_LOCATIONS' other='Japan' otherLabels=['Location'] relProps={'evidence': 'Stray Kids debuted in Japan with the Japanese version of "Levanter", "Double Knot", and "My Pace", as well as the re-recorded version of their songs from the previous album compiled in the compilation album SKZ2020.'} otherProps={'id': 'http://www.wikidata.org/entity/Q17', 'name': 'Japan'}>,
 <Record person='Stray Kids' personLabels=['Organization'] rel='FOUNDED_BY' other='Bang Chan' otherLabels=['Person'] relProps={'evidence': "It was later revealed that the Stray Kids' member line-up was formed unusually, with the leader Bang Chan handpicking each member from the list of JYPE trainees rather than the agency's executives and creative directors selecting them."} otherProps={'id': 'http://www.wikidata.org/entity/Q59670293', 'name': 'Bang Chan'}>,
 <Record person='Stray Kids' personLabels=['Organization'] rel='EMPLOYEE_OR_MEMBER_OF' other='

## Ollama

### Setting up the model

Create the llm

NameError: name 'OLLAMA_MODEL' is not defined

Write the cypher prompt

Useful function in our case

### Subgraph retrieval