In [1]:
import os
from langchain_community.document_loaders import UnstructuredFileLoader
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from langchain_community.graphs import Neo4jGraph
from langchain.docstore.document import Document
from langchain.chains import GraphQAChain
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from pydantic import Field
from langchain.chains.base import Chain
from typing import Dict, Any, List

In [3]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["NEO4J_URI"] = os.getenv("NEO4J_URI")
os.environ["NEO4J_USERNAME"] = os.getenv("NEO4J_USERNAME")
os.environ["NEO4J_PASSWORD"] = os.getenv("NEO4J_PASSWORD")

In [None]:
loader = UnstructuredFileLoader("/Users/siddharthdileep/extracter/01471587/shortened-filings/AA_MzE1ODIxMjk1NmFkaXF6a2N4.pdf")
docs = loader.load()

KeyboardInterrupt: 

In [None]:
type(docs)

list

In [None]:
llm = ChatOpenAI(temperature=0, model_name="gpt-4o-mini")
llm_transformer = LLMGraphTransformer(llm=llm, node_properties=True)


In [None]:
graph_documents = llm_transformer.convert_to_graph_documents(docs)

In [None]:
graph = Neo4jGraph(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD
)

In [None]:
graph.add_graph_documents(graph_documents=graph_documents, baseEntityLabel=True, include_source=True)

In [None]:
graph.refresh_schema()


In [None]:
from langchain_neo4j import Neo4jVector
from langchain.embeddings import OpenAIEmbeddings

vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding",
)

# Now embed and store vectors
vector_index.add_documents(docs)

['088bed6eae7091c7d07b10ff75853bf1']

In [None]:
results = vector_index.similarity_search(
    "What is the name of the company", 
    k=3
)

for i, doc in enumerate(results, 1):
    print(f"\n--- Result {i} ---")
    print(doc.page_content)
    print(doc.metadata)


--- Result 1 ---

text: COMPANIES FORM No. 123

r. Notice of increase in nominal capital CHFP055 Please do not Pursuant to section 123 of the Companies Act 1985 write in this margin

To the Registrar of Companies Please complete

legibly, preferably (Address overleaf) For official use Company Number in black type, or

bold block ~ | Ietlering | | 1471587

Name of Company

insertfullname —|* YODAFONE LIMITED

of company

gives notice in accordance with section 123 of the above Act that by resolution of the company

Sthe copy must dated 31 August 2000 the nominal capital of the company has been increased by coe othe torn £9;999,999,000.000 beyond the registered capital of £1,000.000. approved bythe — A copy of the resolution authorising the increase is attached. § registrar The conditions (e.g. voting rights, dividend rights, winding-up rights etc.) subject to which the

new shares have been or are to be issued are as follows:

Out of the profits available for distribution and resolved 

In [None]:
results

[]

In [None]:
from pyvis.network import Network
import networkx as nx

def showGraph(cypher: str = default_cypher):
    driver = GraphDatabase.driver(
        uri=os.environ["NEO4J_URI"],
        auth=(os.environ["NEO4J_USERNAME"], os.environ["NEO4J_PASSWORD"])
    )
    with driver.session() as session:
        results = session.run(cypher)
        G = nx.Graph()
        for record in results:
            s = record["s"]
            t = record["t"]
            G.add_node(s.id)
            G.add_node(t.id)
            G.add_edge(s.id, t.id)
        net = Network(notebook=True)
        net.from_nx(G)
        net.show("graph.html")
    driver.close()

In [None]:
graph.refresh_schema()
from langchain.prompts import PromptTemplate
from langchain.chains import GraphCypherQAChain
CYPHER_GENERATION_TEMPLATE = """
You are an expert in Cypher query language for Neo4j.
Given the following schema:

{schema}

Please write a valid Cypher query to answer this question:
{question}

Return ONLY the Cypher query without any explanations or text before or after.
"""
CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers.
...
"""
CYPHER_QA_PROMPT = PromptTemplate(
    input_variables=["context", "question"], template=CYPHER_QA_TEMPLATE
)

graph_chain = GraphCypherQAChain.from_llm(
    ChatOpenAI(temperature=0, model_name="gpt-4o-mini"), 
    graph=graph, 
    cypher_prompt=CYPHER_GENERATION_PROMPT, 
    qa_prompt=CYPHER_QA_PROMPT,
    verbose=True,
    allow_dangerous_requests=True
)

In [72]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from pydantic import Field
from langchain.chains.base import Chain
from typing import Dict, Any, List

# This custom chain class combines the results from both retrievers
class HybridRetrieval(Chain):
    """Chain that combines graph and vector search results."""
    
    graph_chain: Chain = Field(...)
    vector_index: Any = Field(...)
    
    @property
    def input_keys(self) -> List[str]:
        return ["query"]
    
    @property
    def output_keys(self) -> List[str]:
        return ["result"]
    
    def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        question = inputs["query"]
        
        try:
            # Get results from the Cypher QA chain
            graph_result = self.graph_chain.invoke(question)
            graph_text = graph_result.get('result', "No graph results found")
        except Exception as e:
            graph_text = f"Graph query error: {str(e)[:100]}"
        
        # Get results from the vector retriever
        vector_docs = self.vector_index.similarity_search(question, k=3)
        vector_result = "\n\n".join([doc.page_content for doc in vector_docs])
        
        # Combine the results into a single string
        combined_result = f"Graph Result:\n{graph_text}\n\nVector Results:\n{vector_result}"
        
        return {"result": combined_result}

# Initialize the hybrid retrieval chain
hybrid_retriever = HybridRetrieval(
    graph_chain=graph_chain,  # Your existing GraphCypherQAChain
    vector_index=vector_index  # Your existing Neo4jVector
)

# Create the final question-answering chain using the hybrid retriever
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
final_rag_chain = {
    "context": lambda x: hybrid_retriever.run(x),
    "question": lambda x: x
}
final_rag_chain = {
    "context": lambda x: hybrid_retriever.invoke(x)["result"],
    "question": lambda x: x
} | prompt | ChatOpenAI(temperature=0, model="gpt-4o-mini") | StrOutputParser()

# Example usage
query = "What is this company about?"
result = final_rag_chain.invoke(query)





[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (c:Company)-[:REGISTERED_WITH]->(o:Organization)<-[:MENTIONS]-(d:Document)
WHERE c.id = 'your_company_id' // replace 'your_company_id' with the actual company ID
RETURN d.text AS document_text
[0m
Full Context:
[32;1m[1;3m[][0m

[1m> Finished chain.[0m


In [73]:
result

"The document pertains to Vodafone Limited, specifically regarding a notice of an increase in its nominal capital. It outlines the company's resolution to increase its nominal capital from £1,000,000 to £9,999,999,000. The notice details the rights associated with the new Preference Shares, including preferential dividend rights and entitlements in the event of winding-up or return of capital. Essentially, it provides information about the company's financial structure and the rights of its shareholders."