# Building a local Knowledge Graph RAG with Neo4j, LangChain, and Ollama

## Preliminary: 
### Installs

In [1]:
!python -V
!pip -q install -U \
  langchain langchain-community langchain-experimental langchain-openai \
  langchain-neo4j langchain-text-splitters \
  neo4j wikipedia


Python 3.12.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


### Environment variables

In [8]:
import os

# Diffbot API key (DON'T COMMIT THIS)
# export DIFFBOT_API_KEY="..."
DIFFBOT_API_KEY = os.getenv("DIFFBOT_API_KEY", "44b4a934bef668e5454c00bf37033521")

# Neo4j (matches your docker-compose)
NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password")
NEO4J_DATABASE = os.getenv("NEO4J_DATABASE", "shop")

# Ollama
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3:8b")

print("DIFFBOT_API_KEY set?", bool(DIFFBOT_API_KEY))
print("Neo4j:", NEO4J_URI, NEO4J_DATABASE)
print("Ollama:", OLLAMA_BASE_URL, OLLAMA_MODEL)


DIFFBOT_API_KEY set? True
Neo4j: bolt://localhost:7687 shop
Ollama: http://localhost:11434 llama3:8b


## Extract graph content
### Load wikipedia documents

In [None]:
from langchain_community.document_loaders import WikipediaLoader

TOPIC = "Stray Kids"   
docs = WikipediaLoader(query=TOPIC, load_max_docs=2).load()

len(docs), docs[0].metadata, docs[0].page_content[:500]


(2,
 {'title': 'Stray Kids',
  'summary': 'Stray Kids (often abbreviated to SKZ; Korean: 스트레이 키즈; RR: Seuteurei Kijeu) is a South Korean boy band formed by JYP Entertainment. The band has eight members: Bang Chan, Lee Know, Changbin, Hyunjin, Han, Felix, Seungmin, and I.N. Former member Woojin left the band in 2019. Stray Kids primarily self-produces its recordings; the main production team is named 3Racha and consists of Bang Chan, Changbin, and Han, and the other members frequently participate in songwriting.\nThe leader, Bang Chan, personally selected each member to be a part of the band before filming the eponymous 2017 reality television show, which is unusual in K-pop, where that authority is usually held by the agency\'s executives and creative directors. The band released their unofficial debut extended play (EP) Mixtape in January 2018 and officially debuted on March 25 with the EP I Am Not, which was followed by the EPs I Am Who and I Am You, completing the I Am EP series. Th

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
split_docs = splitter.split_documents(docs)
len(split_docs), split_docs[0].page_content[:300]


(9,
 'Stray Kids (often abbreviated to SKZ; Korean: 스트레이 키즈; RR: Seuteurei Kijeu) is a South Korean boy band formed by JYP Entertainment. The band has eight members: Bang Chan, Lee Know, Changbin, Hyunjin, Han, Felix, Seungmin, and I.N. Former member Woojin left the band in 2019. Stray Kids primarily self')

### Extract graphs

In [12]:
from langchain_experimental.graph_transformers import DiffbotGraphTransformer

if not DIFFBOT_API_KEY:
    raise ValueError("Missing DIFFBOT_API_KEY env var. Set it locally; do not commit it.")

transformer = DiffbotGraphTransformer(diffbot_api_key=DIFFBOT_API_KEY)

graph_docs = transformer.convert_to_graph_documents(split_docs)
len(graph_docs), type(graph_docs[0])


(9, langchain_community.graphs.graph_document.GraphDocument)

In [None]:
#Display the graph extracted
gd = graph_docs[0]
gd.nodes[:5], gd.relationships[:5]


([Node(id='http://www.wikidata.org/entity/Q59670293', type='Person', properties={'name': 'Bang Chan'}),
  Node(id='http://www.wikidata.org/entity/Q46134670', type='Organization', properties={'name': 'Stray Kids'}),
  Node(id='Woojin', type='Person', properties={'name': 'Woojin'})],
 [Relationship(source=Node(id='http://www.wikidata.org/entity/Q59670293', type='Person', properties={}), target=Node(id='http://www.wikidata.org/entity/Q46134670', type='Organization', properties={}), type='EMPLOYEE_OR_MEMBER_OF', properties={'evidence': 'Stray Kids primarily self-produces its recordings; the main production team is named 3Racha and consists of Bang Chan, Changbin, and Han, and the other members frequently participate in songwriting.', 'isCurrent': 'true'}),
  Relationship(source=Node(id='Woojin', type='Person', properties={}), target=Node(id='http://www.wikidata.org/entity/Q46134670', type='Organization', properties={}), type='EMPLOYEE_OR_MEMBER_OF', properties={'evidence': 'Stray Kids prim