### Neo4j setup

In [2]:

# from langchain_community.graphs import Neo4jGraph
from langchain_neo4j import Neo4jGraph
from dotenv import load_dotenv
import os
# from langchain.graphs.neo4j_graph import Neo4jGraph 
# from langchain.chains import GraphQAChain old
from langchain_neo4j import GraphCypherQAChain

from langchain_core.prompts import PromptTemplate
# from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain

load_dotenv()
neo_pass = os.getenv("NEO4J_PASS")
neo_db_id = os.getenv("DB_ID")

graph = Neo4jGraph(
    url="neo4j+s://f5c81351.databases.neo4j.io",
    username="neo4j",
    password=neo_pass,
    enhanced_schema=True
    # refresh_schema=Fa lse
)

def clean_graph():
    query = """
    MATCH (n)
    DETACH DELETE n
    """
    graph.query(query)

### Llama setup

In [None]:
import os
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_ollama import ChatOllama  # using chatOllama

# Initialize LLM
llm = ChatOllama(model="llama3.2:latest", temperature=0)  # You can change the model to 'llama3' or others
llm_transformer_filtered = LLMGraphTransformer(llm=llm)

# additional_instructions = """
# When creating entities, add a "document_id" property to each node and set it to the document's unique ID.
# For example, if the document ID is "doc123", each created node should include `document_id: "doc123"`.
# Query example: 
# CREATE (n:NodeLabel) 
# SET n.document_id = "doc123" 
# RETURN n
# """

# Use LLMGraphTransformer with Ollama
# llm_transformer = LLMGraphTransformer(
#     llm=llm,
#     additional_instructions=additional_instructions,
#     ignore_tool_usage=True
# )


### Split document into chunks

In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

pdf_path = "resume.pdf"
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()
pages = loader.load() # load pages

# chunk overlap is the shared context window between chunks--allows context to be maintained across chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)

splits = text_splitter.split_documents(pages) # split the pages using LangChain's text_splitter

processed_chunks = []

for i, chunk in enumerate(splits):
    # Process the chunk
    metadata = {
        "chunk_id": i,
        "source": pdf_path,
        "page_number": chunk.metadata.get("page", None),
        "total_length": len(chunk.page_content),
        "text_preview": (
            chunk.page_content[:100] + "..."
            if len(chunk.page_content) > 100
                else chunk.page_content
        ),


    }
    # Store the metadata for each chunk after processing
    processed_chunks.append({"text": chunk.page_content, "metadata": metadata})

print(str(len(processed_chunks)) + " chunks processed")
# print(processed_chunks[6]['metadata']['text_preview'])
# print(processed_chunks[6]['metadata'])
# print(processed_chunks[0]['text_preview'])

    

7 chunks processed


In [57]:
from typing import List, Dict
# Create graph using the processed chunks
def create_graph(chunks: List[Dict]):
    # cypher query to create the chunks & their attributes
    create_chunk_query = """
    MERGE (chunk:Chunk {chunk_id: $chunk_id})
    ON CREATE SET
        chunk.source = $source,
        chunk.page_number = $page_number,
        chunk.total_length = $total_length,
        chunk.text_preview = $text_preview,
        chunk.full_text = $full_text
        RETURN chunk
    """

    for chunk in chunks:
        graph.query

### Parse Document

In [7]:
from langchain_core.documents import Document

# Convert processed chunks to Langchain Document for Neo4j db
docs = [
    Document(
        page_content=chunk['text'],
        metadata=chunk['metadata']
    )
    for chunk in processed_chunks
]

# Convert docs to graph format
graph_docs = llm_transformer_filtered.convert_to_graph_documents(docs)
print(graph_docs)

# Add to neo4j
graph.add_graph_documents(graph_docs, include_source=True, baseEntityLabel=True)


[GraphDocument(nodes=[Node(id='Abi Kakolla', type='Person', properties={}), Node(id='University Of Southern California', type='Organization', properties={}), Node(id='Los Angeles, Ca', type='Location', properties={}), Node(id='Usc Center For Neural Engineering', type='Organization', properties={})], relationships=[], source=Document(metadata={'chunk_id': 0, 'source': 'resume.pdf', 'page_number': 0, 'total_length': 949, 'text_preview': 'Abi Kakolla \nToronto, ON | (647) 957-7403 | kakolla@usc.edu| linkedin.com/in/kakolla|kakolla.com \n \n...'}, page_content='Abi Kakolla \nToronto, ON | (647) 957-7403 | kakolla@usc.edu| linkedin.com/in/kakolla|kakolla.com \n \nEDUCATION \nUniversity of Southern California                                                                                                                     Los Angeles, CA \nBachelor of Science in Computer Science (GPA: 3.71)                  Dec 2026 \nCoursework: Data Structures & Algorithms (C++), Embedded Systems (C), Sol