In [None]:
import gradio as gr
from langchain_groq import ChatGroq
from langchain_community.vectorstores import Neo4jVector
from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain
from langchain_community.graphs import Neo4jGraph as LangChainNeo4jGraph
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
import os

In [9]:
load_dotenv()
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD")
NEO4J_URI = os.environ.get("NEO4J_URI")
NEO4J_USERNAME = os.environ.get("NEO4J_USERNAME")
NEO4J_DATABASE = os.environ.get("NEO4J_DATABASE")

In [11]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

In [14]:
def create_hybrid_rag_chain():
    if not GROQ_API_KEY or not NEO4J_PASSWORD:
        raise ValueError("API key or password not found")
    
    graph = LangChainNeo4jGraph(
        url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
    )
    graph.refresh_schema()

    llm = ChatGroq(temperature=0, model= "meta-llama/llama-4-maverick-17b-128e-instruct")

    embeddings = hf_embeddings
    vector_store = Neo4jVector(
        url=NEO4J_URI,
        username=NEO4J_USERNAME,
        password=NEO4J_PASSWORD,
        database=NEO4J_DATABASE,
        embedding=embeddings,
        index_name="monograph_chunks", # This must match the index name from the ingestion and vectorization notebbok
        node_label="Chunk",
        text_node_property="text",
        embedding_node_property="embedding",
    )
    
    # Graph cypher QA chain for structured queries
    graph_chain = GraphCypherQAChain.from_llm(
        llm=llm,
        #qa_llm=llm,
        graph=graph,
        verbose=True,
        return_intermediate_steps=True,
        allow_dangerous_requests=True
    )

    vector_retriever = vector_store.as_retriever()

    return graph_chain, vector_retriever, llm

In [None]:


def query_hybrid_rag(question, history):
    try:
        # Vector search to find relevant text chunks
        retrieved_docs= vector_retreiver.invoke(question)
        retrieved_context = "\n\n".join([doc.page_content for doc in retrieved_docs])

        # Graph search to find structured data
        graph_result = graph_chain.invoke({"query": question})
        graph_answer = graph_result["result"]
        cypher_query = graph_result["intermediate_steps"][0].get("query", "could not generate cypher query")

        # Synthesize the final answer using both contexts
        synthesis_prompt_template = """
        You are an expert Q&A assistant. Use the following context from a document and a knowledge graph to answer the question.
        Provide a comprehensive answer that combines insights from both sources. If the contexts are empty or irrelevant, say you don't have enough information.

        Vector Search Context (Unstructured text from the document):
        {vector_context}

        Knowledge Graph Context (Structured data from the graph):
        {graph_context}

        Question:
        {question}

        Comprehensive Answer:
        """

        synthesis_prompt = PromptTemplate(
            template=synthesis_prompt_template,
            input_variables=["vector_context", "graph_context", "question"]
        )

        synthesis_chain = synthesis_prompt | llm

        final_answer_obj = synthesis_chain.invoke({
            "vector_context": retrieved_context,
            "graph_context": graph_answer,
            "question": question
        })

        final_answer = final_answer_obj.content      

        formatted_output = (
            f"**Final Answer:**\n{final_answer}\n\n"
            f"---\n"
            f"**Retrieved from Knowledge Graph:**\n{graph_answer}\n\n"
            f"**Generated Cypher Query:**\n```cypher\n{cypher_query}\n```\n\n"
            f"**Retrieved from Vector Search (Top Chunks):**\n> {retrieved_context.replace(chr(10), chr(10) + '> ')}"    
        )

        return formatted_output
    
    except Exception as e:
        print(f"Error during query: {e}]")
        return f"an error occurred: {e}. please ensure neo4j database is running"
    

# GRADIO INTERFACE
if __name__ == "__main__":
        print("Initializing Hybrid GraphRAG chain...")
        try:
            graph_chain, vector_retreiver, llm=create_hybrid_rag_chain()
            print("Chain initialized successfully")

            with gr.Blocks(theme=gr.themes.Soft(), title="Hybrid Cancer Monograph") as demo:
                gr.Markdown(
                """
                # Hybrid GraphRAG Q&A for Cancer Monograph
                This system uses both vector search and a knowledge graph to provide comprehensive answers.
                """
                )
                gr.ChatInterface(
                fn=query_hybrid_rag,
                title="Ask the Monograph (Hybrid)",
                chatbot=gr.Chatbot(height=500),
                textbox=gr.Textbox(placeholder="e.g., What is the link between betel quid chewing and breast cancer in the North-East?", container=False, scale=7),
                examples=[
                    "What are the leading sites of cancer in Mizoram?",
                    "Which studies did the ICMR conduct on tobacco?",
                    "What is the link between the GSTM1 gene and oral cancer?"
                ]
            )
            
            print("\n Launching Gradio App.. open the URL in your browser")
            demo.launch(share=True)

        except Exception as e:
            print(f"falied to initialize the application: {e}")

