In [1]:
# %pip install langchain_community langchain neo4j langchain-huggingface ipywidgets einops pypdf tiktoken

In [2]:
import os

from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
from langchain_chroma import Chroma
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_core.output_parsers import JsonOutputParser
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import ChatOllama

In [3]:
load_dotenv()
ollama_base_url = ""

if os.getenv("BASE_URL"):
    ollama_base_url = os.getenv("BASE_URL")
else:
    ollama_base_url = None

In [None]:
# Initialize the HuggingFace embeddings model with specific parameters
embeddings = HuggingFaceEmbeddings(
    model_name="nomic-ai/nomic-embed-text-v1.5",
    model_kwargs={"device": "cuda", "trust_remote_code": True},
)

In [5]:
# Define Neo4j database connection parameters
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "yourpassword"

# Initialize the Neo4jGraph object with the connection parameters
graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
)

# Create a Neo4jVector object from the existing graph database
vecFromGraphDB = Neo4jVector.from_existing_graph(
    embedding=embeddings,
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    index_name="vecFromGraph",
    node_label="GovernmentDocument",
    text_node_properties=["summary"],
    embedding_node_property="embedding",
)

vector_db = Chroma(
    collection_name="rag-chroma",
    persist_directory="./chromaVDB",
    embedding_function=embeddings,
)

In [None]:
# Perform a similarity search on the vector database with a query
question = "What is the maximum percentage of any corporate bond issue that a single FPI can invest in?"
simResults = vecFromGraphDB.similarity_search_with_relevance_scores(question)
print(simResults)

In [None]:
# Define a Cypher query to find nodes connected to a specific document
connected_docs = []
connected_docs.append(simResults[0][0].metadata["name"])

matched_docs = []

while len(connected_docs) > 0:
    for i in connected_docs:
        query = f"MATCH (start {{name: '{i}'}})<-[r]-(connected) RETURN connected"
        query_results = graph.query(query)

        for record in query_results:
            connected_docs.append(record["connected"]["name"])

        matched_docs.append(i)
        connected_docs.remove(i)

matched_docs = list(set(matched_docs))
print(matched_docs)

In [None]:
# Define a Cypher query to find nodes connected to a specific document
matched_docs = []

query = f"MATCH (start {{name: '{simResults[0][0].metadata['name']}'}})<-[r]-(connected) RETURN connected"
query_results = graph.query(query)

for record in query_results:
    matched_docs.append(record["connected"]["name"])

for i in matched_docs:
    query = f"MATCH (start {{name: '{i}'}})<-[r]-(connected) RETURN connected"
    query_results = graph.query(query)

    for record in query_results:
        matched_docs.append(record["connected"]["name"])

matched_docs.append(simResults[0][0].metadata["name"])

matched_docs = list(set(matched_docs))
print(matched_docs)

In [9]:
llm = ChatOllama(
    base_url=ollama_base_url,
    model="llama3.1",
    format="json",
    temperature=0,
)

prompt = PromptTemplate(
    template="""You are a grader assessing relevance of a retrieved document to a user question.
     
    <Retrieved Document> \n
    {document} 
    </Retrieved Document> \n
    

    <User Question> \n
    {question}
    </User Question> \n
    
    If the document contains keywords related to the user question, grade it as relevant. Use logic and understand the context of the question and document to make decisions. Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. Provide the binary score as a JSON with a single key 'score' and no premable or explanation.""",
    input_variables=["question", "document"],
)

retrieval_grader = prompt | llm | JsonOutputParser()

In [None]:
vector_db.get()

In [None]:
for doc in matched_docs:
    ret_content = vector_db.similarity_search(question, filter={"name": doc}, k=1)
    print(ret_content[0].page_content)
    # response = retrieval_grader.invoke({"question": question, "document": doc})
    # print(response)