In [None]:
pip install neo4j


In [None]:
pip install neo4j-graphrag

In [None]:
pip install neo4j-graphrag[openai]

In [None]:
pip install pathlib

In [None]:
"""This example demonstrates how to use SimpleKGPipeline with automatic schema extraction
from a PDF file. When no schema is provided to SimpleKGPipeline, automatic schema extraction
is performed using the LLM.

Note: This example requires an OpenAI API key to be set in the .env file.
"""

import neo4j
from neo4j_graphrag.retrievers import VectorRetriever
from neo4j_graphrag.generation.graphrag import GraphRAG
from neo4j import GraphDatabase
from neo4j_graphrag.embeddings import AzureOpenAIEmbeddings
from neo4j_graphrag.experimental.pipeline.kg_builder import SimpleKGPipeline
from neo4j_graphrag.llm import AzureOpenAILLM
from neo4j_graphrag.experimental.components.text_splitters.fixed_size_splitter import FixedSizeSplitter
from neo4j_graphrag.experimental.pipeline import Pipeline
from neo4j_graphrag.schema import get_schema
from pathlib import Path

import os



Reading all the pdf files


In [None]:

folder_path = Path(r"C:\Users\shant\OneDrive\Desktop\GraphRAGresume\input")

# Collect all PDF files in that folder
pdf_files = list(folder_path.glob("*.pdf"))
print(pdf_files)



Connecting to the driver

In [None]:

async def run_kg_pipeline_with_auto_schema() -> None:
    """Run the SimpleKGPipeline with automatic schema extraction from a PDF file."""

    # Define Neo4j connection
NEO4J_URI="neo4j+ssc://efea2c90.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="7V18VY7NXa1QQl06JD7_FONhdeqSap_7pUMBTgg-o3A"
NEO4J_DATABASE="neo4j"
AURA_INSTANCEID="efea2c90"
AURA_INSTANCENAME="Instance01"

from neo4j import GraphDatabase
 
AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)
with GraphDatabase.driver(NEO4J_URI, auth=AUTH) as driver:
    driver.verify_connectivity()


Initialising LLM ,Embedder and Driver


In [None]:

    # Define LLM parameters
llm_model_params = {
        "max_tokens": 2000,
        "response_format": {"type": "json_object"},
        "temperature": 0,  # Lower temperature for more consistent output
    }

    # Initialize the Neo4j driver
driver = neo4j.GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

    # Create the LLM instance

llm = AzureOpenAILLM(
    #model_name="gpt-4o-intern",
    # model_name="gpt-4o",  # This should match your Azure deployment name for the LLM
    # Create the embedder instance
)
embedder = AzureOpenAIEmbeddings(
    
)


Indexing Pipeline

In [None]:


        # Create a SimpleKGPipeline instance without providing a schema
        # This will trigger automatic schema extraction
kg_builder = SimpleKGPipeline(
llm=llm,
driver=driver,
embedder=embedder,
from_pdf=True,
)
for pdf_file in pdf_files:
    print(f"Processing: {pdf_file}")
    pdf_result=await kg_builder.run_async(file_path=str(pdf_file))
    print(f"Result: {pdf_result}")
    # Close connections
#await llm.async_client.close()
#driver.close()



In [None]:

async def main() -> None:
    # Run the pipeline
    await run_kg_pipeline_with_auto_schema()

if __name__ == "__main__":
    
    await(main())

Creation of Indexes in Neo4j

In [None]:
from neo4j import GraphDatabase
from neo4j_graphrag.indexes import create_vector_index


INDEX_NAME = "vector-index-name"
NEO4J_URI="neo4j+ssc://efea2c90.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="7V18VY7NXa1QQl06JD7_FONhdeqSap_7pUMBTgg-o3A"
# Connect to the Neo4j database
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

# Create the index
res=create_vector_index(
    driver,
    INDEX_NAME,
    label="Chunk",
    embedding_property="embedding",
    dimensions=1536,
    similarity_fn="cosine",
)
print(f"Index created: {res}")
driver.close()


In [None]:
from neo4j_graphrag.schema import get_schema
from neo4j import GraphDatabase
 
schema = get_schema(driver, database="neo4j")
print(schema)

In [None]:
rag_template = '''Answer the Question using the following Context. Only respond with information mentioned in the Context. Do not inject any speculative information not mentioned.

# Question:
{query_text}

# Context:
{context}

# Answer:
'''

vector retriever

In [None]:
from neo4j_graphrag.indexes import create_vector_index

create_vector_index(driver, 
                    name="vector-index-name", 
                    label="Chunk", 
                    embedding_property="embedding", 
                    dimensions=1536,  
                    similarity_fn="cosine")

In [None]:
from neo4j_graphrag.retrievers import VectorRetriever

vector_retriever = VectorRetriever(
   driver,
   index_name="vector-index-name",
   embedder=embedder,
   return_properties=["text"]
)


In [None]:
import json

vector_res = vector_retriever.get_search_results(query_text = "CGPA of abhishek in Undergraduation ?",top_k=5)
for i in vector_res.records: print("====n" + json.dumps(i.data(), indent=4))

VectorCypherRetriever

In [None]:
from neo4j_graphrag.retrievers import VectorCypherRetriever

vc_retriever = VectorCypherRetriever(
    driver,
    index_name="vector-index-name",
    embedder=embedder,
    retrieval_query="""
//1) Go out 2-3 hops in the entity graph and get relationships
WITH node AS chunk
MATCH (chunk)<-[:FROM_CHUNK]-()-[relList:!FROM_CHUNK]-{1,2}()
UNWIND relList AS rel
 
//2) collect relationships and text chunks
WITH collect(DISTINCT chunk) AS chunks,
 collect(DISTINCT rel) AS rels
 
//3) format and return context
RETURN '=== text ===n' + apoc.text.join([c in chunks | c.text], 'n---n') + 'nn=== kg_rels ===n' +
 apoc.text.join([r in rels | startNode(r).name + ' - ' + type(r) + '(' + coalesce(r.details, '') + ')' +  ' -> ' + endNode(r).name ], 'n---n') AS info
"""
)
 


In [None]:
vc_res = vc_retriever.get_search_results(query_text = "summarise me the resume of ALOK MISHRA?", top_k=1)

# print output
kg_rel_pos = vc_res.records[0]['info'].find('nn=== kg_rels ===n')
print("# Text Chunk Context:")
print(vc_res.records[0]['info'][:kg_rel_pos])
print("# KG Context From Relationships:")
print(vc_res.records[0]['info'][kg_rel_pos:])

 

Text2Cypher Retriever

In [None]:
import neo4j
from neo4j_graphrag.retrievers import Text2CypherRetriever


llm_model_params = {
        "max_tokens": 2000,
        "response_format": {"type": "text"},
        "temperature": 0,  # Lower temperature for more consistent output
    }

    # Initialize the Neo4j driver
driver = neo4j.GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

    # Create the LLM instance

llm = AzureOpenAILLM(
    #model_name="gpt-4o-intern",
    # model_name="gpt-4o",  # This should match your Azure deployment name for the LLM
    model_name ="gpt-4o-11-20",  # This should match your Azure deployment name for the LLM
    azure_endpoint="https://raginternaffine.openai.azure.com/",  # update with your endpoint
    api_version="2024-12-01-preview",  # update appropriate version
    api_key="9iApMYG4ac931NMjWX6cM0AMKhJKsC7Y6tDOPOSGAPSe7lypOGlZJQQJ99BGACMsfrFXJ3w3AAABACOGrjzw",  # api_key is optional and can also be set with OPENAI_API_KEY env var
    model_params=llm_model_params,
    )
# (Optional) Provide user input/query pairs for the LLM to use as examples
examples = [

    "USER INPUT: 'Who all are skilled in vector databases?' QUERY: MATCH (p:Person)-[:DEMONSTRATES_SKILL]->(s:Skill) WHERE s.name =~ '(?i).*vector.*' RETURN p.name"
]

with neo4j.GraphDatabase.driver(NEO4J_URI, auth=AUTH) as driver:
    # Initialize the retriever
    Text2Cypher_Retriever = Text2CypherRetriever(
        driver,
        llm=llm,
        neo4j_schema=schema,
        examples=examples,
        neo4j_database='neo4j',
    )
query_text = "summarise me the resume of Abhishek?"
ret=Text2Cypher_Retriever.search(query_text=query_text)
    # print output
print(ret)

    


In [None]:
Text2Cypher_Retriever_rag = GraphRAG(retriever=Text2Cypher_Retriever, llm=llm)

# Query the graph
query_text = "give me information about work experience of Abhishek Nandgadkar?"
response = Text2Cypher_Retriever_rag.search(query_text=query_text)
print(response.answer)

Hybrid-Cypher Retriever

In [None]:
from neo4j_graphrag.indexes import create_fulltext_index
FULLTEXT_INDEX_NAME = "fulltext_index"

driver = neo4j.GraphDatabase.driver(NEO4J_URI, auth=AUTH)
create_fulltext_index(
    driver, FULLTEXT_INDEX_NAME, label="Document", node_properties=["textProperty"]
)

In [None]:
from neo4j_graphrag.retrievers import HybridCypherRetriever
llm_model_params = {
        "max_tokens": 2000,
        "response_format": {"type": "text"},
        "temperature": 0,  # Lower temperature for more consistent output
    }
llm = AzureOpenAILLM(
    #model_name="gpt-4o-intern",
    # model_name="gpt-4o",  # This should match your Azure deployment name for the LLM
    model_name ="gpt-4o-11-20",  # This should match your Azure deployment name for the LLM
    azure_endpoint="https://raginternaffine.openai.azure.com/",  # update with your endpoint
    api_version="2024-12-01-preview",  # update appropriate version
    api_key="9iApMYG4ac931NMjWX6cM0AMKhJKsC7Y6tDOPOSGAPSe7lypOGlZJQQJ99BGACMsfrFXJ3w3AAABACOGrjzw",  # api_key is optional and can also be set with OPENAI_API_KEY env var
    model_params=llm_model_params,
    )

RETRIEVAL_QUERY = """
MATCH (p:Person)-[:DEMONSTRATES_SKILL]->(s:Skill)
RETURN s.name AS skill,
       count(DISTINCT p) AS person_count,
       collect(DISTINCT p.name) AS persons
ORDER BY person_count DESC;
"""

with neo4j.GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) as driver:
    # Initialize the retriever
    HybridCypher_Retriever = HybridCypherRetriever(
        driver=driver,
        vector_index_name='vector-index-name',
        fulltext_index_name='fulltext_index',
        embedder=embedder,
        retrieval_query=RETRIEVAL_QUERY,
        result_formatter=None,
    )
    # Perform the similarity search for a text query
query_text = "CGPA of Abhishek Nandgadkar?"
response = HybridCypher_Retriever.search(query_text=query_text, top_k=1)
print(f"Hybrid Cypher Response: {response}")

In [None]:
# Initialize the RAG pipeline
HybridCypher_Retriever_rag = GraphRAG(retriever=HybridCypher_Retriever, llm=llm)

# Query the graph
query_text = "Work experience of Abhishek Nandgadkar?"
response = HybridCypher_Retriever_rag.search(query_text=query_text, retriever_config={'top_k': 40})
print(response.answer)

Hybrid Retriever

In [None]:
from neo4j_graphrag.retrievers import HybridRetriever

#INDEX_NAME = "vector-index-name"
#FULLTEXT_INDEX_NAME = "fulltext_index"

llm_model_params = {
        "max_tokens": 2000,
        #"response_format": {"type": "json_object"},
        "temperature": 0,  # Lower temperature for more consistent output
    }

    # Initialize the Neo4j driver
    # Create the LLM instance

llm = AzureOpenAILLM(
    #model_name="gpt-4o-intern",
    # model_name="gpt-4o",  # This should match your Azure deployment name for the LLM
    model_name ="gpt-4o-11-20",  # This should match your Azure deployment name for the LLM
    azure_endpoint="https://raginternaffine.openai.azure.com/",  # update with your endpoint
    api_version="2024-12-01-preview",  # update appropriate version
    api_key="9iApMYG4ac931NMjWX6cM0AMKhJKsC7Y6tDOPOSGAPSe7lypOGlZJQQJ99BGACMsfrFXJ3w3AAABACOGrjzw",  # api_key is optional and can also be set with OPENAI_API_KEY env var
    model_params=llm_model_params,
    )
with neo4j.GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD)) as driver:
    # Initialize the retriever
    Hybrid_Retriever = HybridRetriever(
        driver=driver,
        vector_index_name="vector-index-name",
        fulltext_index_name="fulltext_index",
        embedder=embedder,
    )

    # Perform the similarity search for a text query
    # (retrieve the top 5 most similar nodes)
    #query_text = "Who all have worked with vector Databases"
    #print(Hybrid_Retriever.search(query_text=query_text, top_k=5))
    Hybrid_Retriever_rag = GraphRAG(retriever=Hybrid_Retriever, llm=llm)

# Query the graph
query_text = "work experience of Abhishek Nandgadkar?"
response = Hybrid_Retriever_rag.search(query_text=query_text, retriever_config={'top_k': 44})
print(response.answer)

vector retriever

In [None]:
# Instantiate the RAG pipeline
vector_retriever_rag = GraphRAG(retriever=vector_retriever, llm=llm)
# Query the graph
# Give me a table of education details of every person?

query_text = "work experience of Abhishek Nandgadkar?"
response = vector_retriever_rag.search(query_text=query_text, retriever_config={"top_k": 1})

print(response.answer)
driver.close()

Instantiate the rag

In [None]:
from neo4j_graphrag.generation.prompts import RagTemplate
from neo4j_graphrag.retrievers import VectorRetriever

# Define the template as a string
template_str = """
Context:
{context}

Question:
{query_text}

Answer:
"""

# Create the RagTemplate object
rag_template = RagTemplate(
    template=template_str,
    expected_inputs=['query_text', 'context']
)


vector_retriever_rag  = GraphRAG(llm=llm, retriever=vector_retriever, prompt_template=rag_template)
vector_cypher_retriever_rag = GraphRAG(llm=llm, retriever=vc_retriever, prompt_template=rag_template)
Text2Cypher_Retriever_rag = GraphRAG(retriever=Text2Cypher_Retriever, llm=llm)
HybridCypher_Retriever_rag = GraphRAG(retriever=HybridCypher_Retriever, llm=llm)
Hybrid_Retriever_rag = GraphRAG(retriever=Hybrid_Retriever, llm=llm)

Different Query Outputs

query responses

In [None]:
q = "List of people who know NLP?"

print(f"Vector Response: n{ vector_retriever_rag.search(q, retriever_config={'top_k':5}).answer}")
print("n===========================n")
print(f"Vector + Cypher Response: n{vector_cypher_retriever_rag.search(q, retriever_config={'top_k':5}).answer}")

vector-cypher

In [None]:
vector_cypher_retriever_rag = GraphRAG(llm=llm, retriever=vc_retriever)

query_text = "List of people who know NLP?"

vc_rag_result = vector_cypher_retriever_rag.search(query_text=query_text, retriever_config={'top_k': 5}, return_context=True)

print(f"Vector + Cypher Response: n{vc_rag_result.answer}")

vector

In [None]:
vector_retriever_rag  = GraphRAG(llm=llm, retriever=vector_retriever)

query_text = "List of people who know NLP?"
v_rag_result = vector_retriever_rag.search(query_text=query_text, retriever_config={'top_k': 5}, return_context=True)
print("n===========================n")
print(f"Vector Response: {v_rag_result.answer}")

hybrid-retriever rag

In [None]:
Hybrid_Retriever_rag = GraphRAG(retriever=Hybrid_Retriever, llm=llm)

query_text = "List of people who know NLP?"
response = Hybrid_Retriever_rag.search(query_text=query_text, retriever_config={'top_k': 44})
print(response.answer)

hybrid-cypher

In [None]:
HybridCypher_Retriever_rag = GraphRAG(retriever=HybridCypher_Retriever, llm=llm)

query_text = "List of people who know NLP?"
response=HybridCypher_Retriever_rag.search(query_text=query_text)
print(response.answer)

text2cypher

In [None]:
Text2Cypher_Retriever_rag = GraphRAG(retriever=Text2Cypher_Retriever, llm=llm)

query_text = "List of people who know NLP?"
response=Text2Cypher_Retriever_rag.search(query_text=query_text)
    # print output
print(response.answer)