In [None]:
                ┌────────────────────┐
                │   User Query       │
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │   Entity Linking   │ ◄─ Extract & disambiguate entities from query
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │   Graph Traversal  │ ◄─ Use Neo4j, RDF, or KG to find related entities & paths
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │ Graph-Aware Retrieval│ ◄─ Retrieve related docs based on graph context
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │  Context Formatting │ ◄─ Include graph facts/paths in prompt
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │   Prompt LLM (RAG) │ ◄─ Use hybrid context: graph + docs
                └────────┬───────────┘
                         │
                         ▼
                ┌────────────────────┐
                │   Generated Answer │
                └────────────────────┘


In [8]:
# In Graph RAG, you can either (1) use the graph solely for reasoning by extracting structured facts at 
# runtime (not embedded), or 
# (2) convert graph facts into natural language and embed them into the vector
# store for hybrid semantic search alongside documents.


# in our example, answer(1) is better


# Need to save graph data somewhere: either networkx (in memory), however in prod it has to be 
# in a real graph database like neo4j (full featured, sacalable, fast)

In [7]:
# OPTION1 : use the graph solely for reasoning (not embedded)
import networkx as nx
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import Ollama
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

# === Step 1: Create in-memory biomedical knowledge graph ===
def build_biomedical_graph():
    G = nx.DiGraph()

    G.add_node("BRCA1", type="Gene")
    G.add_node("DNA Repair Failure", type="Effect")
    G.add_node("PARP Inhibitors", type="DrugClass")
    G.add_node("Olaparib", type="Drug")
    G.add_node("Synthetic Lethality", type="Mechanism")

    G.add_edge("BRCA1", "DNA Repair Failure", relation="mutation_causes")
    G.add_edge("DNA Repair Failure", "PARP Inhibitors", relation="creates_vulnerability_for")
    G.add_edge("PARP Inhibitors", "Olaparib", relation="includes")
    G.add_edge("BRCA1", "Synthetic Lethality", relation="enables_via")

    return G

def extract_graph_context(G, focus="BRCA1"):
    facts = []
    for neighbor in G.successors(focus):
        rel = G.edges[focus, neighbor]["relation"]
        facts.append(f"{focus} {rel.replace('_', ' ')} {neighbor}.")
        for second_neighbor in G.successors(neighbor):
            rel2 = G.edges[neighbor, second_neighbor]["relation"]
            facts.append(f"{neighbor} {rel2.replace('_', ' ')} {second_neighbor}.")
    return " ".join(facts)

# === Step 2: Load document and build vector store ===
loader = TextLoader("data/brca1_cancer_links.txt")
documents = loader.load()
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever(search_type="similarity", k=3)

# === Step 3: Build graph and extract context ===
G = build_biomedical_graph()
graph_context = extract_graph_context(G)

# === Step 4: Get supporting unstructured context ===
query = "How does BRCA1 mutation create a therapeutic opportunity for cancer treatment?"
text_context = "\n".join([doc.page_content for doc in retriever.get_relevant_documents(query)])

# === Step 5: Combine graph and text into one prompt ===
combined_context = f"""GRAPH FACTS:\n{graph_context}\n\nDOCUMENT CONTEXT:\n{text_context}"""

prompt = PromptTemplate.from_template("""
Use the following context to answer the question scientifically and clearly.

{context}

Question: {question}
Answer:
""")

llm = Ollama(model="llama3.1")
chain = LLMChain(llm=llm, prompt=prompt)

response = chain.run(context=combined_context, question=query)

# === Step 6: Display result ===
print("\n🔍 Query:")
print(query)

print("\n📊 Graph-derived facts:")
print(graph_context)

print("\n💬 Answer from Graph RAG (in-memory):")
print(response)



🔍 Query:
How does BRCA1 mutation create a therapeutic opportunity for cancer treatment?

📊 Graph-derived facts:
BRCA1 mutation causes DNA Repair Failure. DNA Repair Failure creates vulnerability for PARP Inhibitors. BRCA1 enables via Synthetic Lethality.

💬 Answer from Graph RAG (in-memory):
A BRCA1 mutation creates a therapeutic opportunity for cancer treatment by introducing vulnerability to PARP inhibitors. This is due to the fact that homologous recombination repair, which is mediated by BRCA1, is impaired in cells with a BRCA1 mutation. As a result, when PARP is inhibited, DNA damage accumulates and cell death occurs.

In other words, the lack of functional BRCA1 makes cancer cells dependent on alternative, error-prone DNA repair pathways that are not as effective. PARP inhibitors, such as Olaparib, take advantage of this vulnerability by inhibiting another essential enzyme in these alternative pathways, leading to an accumulation of lethal DNA damage and cell death.

This concep

In [9]:
# option 2 : ybrid Graph RAG version where graph facts are embedded into the vector store alongside text chunks, so they can also be retrieved semantically, not just injected manually.

In [12]:
import networkx as nx
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import Ollama
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document

# === Step 1: Create in-memory biomedical knowledge graph ===
def build_biomedical_graph():
    G = nx.DiGraph()

    G.add_node("BRCA1", type="Gene")
    G.add_node("DNA Repair Failure", type="Effect")
    G.add_node("PARP Inhibitors", type="DrugClass")
    G.add_node("Olaparib", type="Drug")
    G.add_node("Synthetic Lethality", type="Mechanism")

    G.add_edge("BRCA1", "DNA Repair Failure", relation="mutation_causes")
    G.add_edge("DNA Repair Failure", "PARP Inhibitors", relation="creates_vulnerability_for")
    G.add_edge("PARP Inhibitors", "Olaparib", relation="includes")
    G.add_edge("BRCA1", "Synthetic Lethality", relation="enables_via")

    return G

def generate_graph_facts_as_docs(G, focus="BRCA1"):
    facts = []
    for neighbor in G.successors(focus):
        rel = G.edges[focus, neighbor]["relation"]
        sentence = f"{focus} {rel.replace('_', ' ')} {neighbor}."
        facts.append(Document(page_content=sentence))
        for second_neighbor in G.successors(neighbor):
            rel2 = G.edges[neighbor, second_neighbor]["relation"]
            sentence2 = f"{neighbor} {rel2.replace('_', ' ')} {second_neighbor}."
            facts.append(Document(page_content=sentence2))
    return facts

# === Step 2: Load and prepare documents (unstructured) ===
loader = TextLoader("data/brca1_cancer_links.txt")
raw_documents = loader.load()

splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(raw_documents)

# === Step 3: Convert graph into fact-based text documents ===
G = build_biomedical_graph()
graph_docs = generate_graph_facts_as_docs(G)
print("graph_doc", graph_docs)
# === Step 4: Combine unstructured + graph facts ===
all_docs = docs + graph_docs
print("docs", docs)
# === Step 5: Embed and store all into FAISS ===
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(all_docs, embeddings)
retriever = vectorstore.as_retriever(search_type="similarity", k=5)

# === Step 6: Run the hybrid retrieval-based generation ===
query = "How does BRCA1 mutation create a therapeutic opportunity for cancer treatment?"
retrieved_docs = retriever.get_relevant_documents(query)
combined_context = "\n".join([doc.page_content for doc in retrieved_docs])

prompt = PromptTemplate.from_template("""
Use the following information to answer the question scientifically and clearly.

{context}

Question: {question}
Answer:
""")

llm = Ollama(model="llama3.1")
chain = LLMChain(llm=llm, prompt=prompt)
response = chain.run(context=combined_context, question=query)

# === Step 7: Display output ===
print("\n🔍 Query:")
print(query)

print("\n📚 Retrieved Context:")
print(combined_context)

print("\n💬 Answer from Hybrid Graph RAG:")
print(response)


graph_doc [Document(metadata={}, page_content='BRCA1 mutation causes DNA Repair Failure.'), Document(metadata={}, page_content='DNA Repair Failure creates vulnerability for PARP Inhibitors.'), Document(metadata={}, page_content='BRCA1 enables via Synthetic Lethality.')]
docs [Document(metadata={'source': 'data/brca1_cancer_links.txt'}, page_content='BRCA1 is a tumor suppressor gene that plays a critical role in the repair of DNA double-strand breaks through homologous recombination repair. Mutations in BRCA1 impair this process, leading to genomic instability.\n\nBreast cancer is often associated with inherited mutations in BRCA1 and BRCA2 genes. Individuals carrying these mutations have a significantly increased risk of developing breast and ovarian cancers.'), Document(metadata={'source': 'data/brca1_cancer_links.txt'}, page_content='PARP inhibitors, such as Olaparib, are targeted cancer therapies that exploit synthetic lethality. In BRCA1-deficient cells, inhibition of PARP leads to