# GraphRAG

<img src="./media/graphRAG_Architecture.png" width=600>

# Graph_RAG Setup

In [4]:
import os
import glob
import json
import subprocess
import shlex
from pathlib import Path
from typing import Optional
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import LLMChain
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import chromadb
from dotenv import load_dotenv
from smolagents import OpenAIServerModel, CodeAgent, ToolCallingAgent, HfApiModel, tool, GradioUI
from datasets import load_metric
import fitz  # PyMuPDF
import os
import re
from unidecode import unidecode
from glob import glob
from tqdm import tqdm
from langchain_text_splitters import TokenTextSplitter
import pandas as pd


### === CONFIG ====

In [2]:
docs_path = "D:/2CSI-Project/PDFs_papers/*.pdf"
chunk_size = 1200
overlap = 100
output_file = "graph_output.json"
tool_model_id = os.getenv("TOOL_MODEL_ID")
huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN")
db_dir = r"D:\\2CSI-Project\\VectorDB_Embeddings"
graph_root_path = "./ragtest"
INPUT_PDF_DIR = r"C:\Users\ACER NITRO\OneDrive\Bureau\2CSI\Project 2SCI\Automated-Information-Retrieval-and-Summarization-for-Academic-Research-Articles\RAG_Models(notebooks)\DAIT_DEHANE_Yacine\GraphRAG_deepseek\input"
OUTPUT_TXT_DIR = r"C:\Users\ACER NITRO\OneDrive\Bureau\2CSI\Project 2SCI\Automated-Information-Retrieval-and-Summarization-for-Academic-Research-Articles\RAG_Models(notebooks)\DAIT_DEHANE_Yacine\GraphRAG_deepseek\ragtest\input"


# === MODEL SELECTOR ===


### === PreProcess Scientific Papers 'PDF -> .txt' ===


In [5]:

os.makedirs(OUTPUT_TXT_DIR, exist_ok=True)

def clean_text(text: str) -> str:
    text = unidecode(text)

    lines = text.split("\n")
    cleaned_lines = []
    for line in lines:
        line = line.strip()
        if len(line) < 4:
            continue
        if re.match(r"^\s*(Page)?\s*\d+\s*$", line, re.IGNORECASE):
            continue
        if re.search(r"(logo|www\.|http|copyright|all rights reserved)", line, re.IGNORECASE):
            continue
        cleaned_lines.append(line)

    cleaned_text = "\n".join(cleaned_lines)
    cleaned_text = re.sub(r"\n{2,}", "\n\n", cleaned_text)

    return cleaned_text.strip()

def process_pdf(pdf_path: str, output_dir: str, split_chunks=False, chunk_size=1200, chunk_overlap=100):
    doc = fitz.open(pdf_path)
    full_text = ""

    for page in doc:
        full_text += page.get_text()
    doc.close()

    cleaned = clean_text(full_text)

    base_name = os.path.splitext(os.path.basename(pdf_path))[0]
    output_txt_path = os.path.join(output_dir, f"{base_name}.txt")

    with open(output_txt_path, "w", encoding="utf-8") as f:
        f.write(cleaned)

    print(f"✅ Cleaned text saved to: {output_txt_path}")

    if split_chunks:
        text_splitter = TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        chunks = text_splitter.split_text(cleaned)
        for idx, chunk in enumerate(chunks):
            chunk_path = os.path.join(output_dir, f"{base_name}_chunk_{idx}.txt")
            with open(chunk_path, "w", encoding="utf-8") as cf:
                cf.write(chunk)
        print(f"🧩 {len(chunks)} chunks saved for: {base_name}")

def batch_process_pdfs(input_dir: str, output_dir: str, split_chunks=False):
    pdf_files = glob(os.path.join(input_dir, "*.pdf"))
    print(f"📚 Found {len(pdf_files)} PDFs.")

    for pdf_path in tqdm(pdf_files, desc="Processing PDFs"):
        process_pdf(pdf_path, output_dir, split_chunks=split_chunks)



### ---- PDF Processing and Chunking ----

In [6]:
batch_process_pdfs(INPUT_PDF_DIR, OUTPUT_TXT_DIR, split_chunks=False) 


📚 Found 0 PDFs.


Processing PDFs: 0it [00:00, ?it/s]


#### ==== Graph Visualization ====

#### Looking at Final Entities and Relationships

In [7]:

entities = pd.read_parquet('./ragtest/output/20250421-001836/artifacts/create_final_entities.parquet')

entities.head()

Unnamed: 0,id,name,type,description,human_readable_id,graph_embedding,text_unit_ids,description_embedding
0,b45241d70f0e43fca764df95b2b81f77,"""HERBERT WOISETSCHLAGER, ALEXANDER ERBEN, SHIQ...","""PERSON"", ""ORGANIZATION"")(""ENTITY""","""ACM Reference Format""",0,,[e9aa2e30f8a16ab2a2fa9be1c46e9de1],"[-0.0522812083363533, -0.2714383900165558, -3...."
1,4119fd06010c494caa07f439b333f4c5,"""AGX ORIN""","""ORGANIZATION"", ""AGX ORIN""","""person""",1,,[0d66389e93327df8f525277dda4617e5],"[-0.195903941988945, 0.6456646919250488, -3.13..."
2,d3835bf3dda84ead99deadbeac5d0d7d,"""ORGANIZATION""","""THE ORGANIZATION IS NOT EXPLICITLY MENTIONED ...","The Organization, a prominent entity, holds a...",2,,"[7befbf2cdd18e8189b0f6e34637a77f3, 7c22470c632...","[-0.5362508296966553, 1.8357473611831665, -3.1..."
3,077d2820ae1845bcbb1803379a3d1eae,"""GNNS""","""TECHNOLOGY""","""GNNs"" refers to Graph Neural Networks, a type...",3,,[242307f545da2144b2e3affbd99017d2],"[0.17368170619010925, 2.1736154556274414, -3.5..."
4,3671ea0dd4e84c1a9b02c5ab2c8f4bac,"""AI AND MACHINE LEARNING LANDSCAPE""","""CONCEPT""","""AI and Machine Learning Landscape"" is the fie...",4,,[242307f545da2144b2e3affbd99017d2],"[-0.25016453862190247, 1.5578858852386475, -3...."


In [8]:
relationships = pd.read_parquet('./ragtest/output/20250421-001836/artifacts/create_final_relationships.parquet')

relationships.head()

Unnamed: 0,source,target,weight,description,text_unit_ids,id,human_readable_id,source_degree,target_degree,rank
0,"""GNNS""","""QUANTUM COMPUTING""",1.0,"""GNNs will be enhanced by Quantum Computing, e...",[242307f545da2144b2e3affbd99017d2],6fae5ee1a831468aa585a1ea09095998,0,1,1,2
1,"""CLASSIFICATION DIFFICULTY""","""CONCEPT""",1.0,"""The complexity level of a feature determines ...",[6432a1a2eeff7c0f772b6fd06da0131a],ef32c4b208d041cc856f6837915dc1b0,1,1,1,2
2,"""IBAN, E-MAIL ADDRESS, POSTAL CODES""","""FEATURES""",1.0,"""These features are examples of simple feature...",[6432a1a2eeff7c0f772b6fd06da0131a],07b2425216bd4f0aa4e079827cb48ef5,2,1,2,3
3,"""RULE-BASED (E.G., REGULAR EXPRESSIONS)""","""EXTRACTION METHODS""",1.0,"""Simple features can be extracted using rule-b...",[6432a1a2eeff7c0f772b6fd06da0131a],2670deebfa3f4d69bb82c28ab250a209,3,1,2,3
4,"""NAMED ENTITIES (E.G., ORGANIZATION, PERSON NA...","""FEATURES""",1.0,"""These features are examples of dynamic featur...",[6432a1a2eeff7c0f772b6fd06da0131a],404309e89a5241d6bff42c05a45df206,4,1,2,3


##### Combining all of this with our relationships gives us our final nodes.

In [9]:
nodes = pd.read_parquet('./ragtest/output/20250421-001836/artifacts/create_final_nodes.parquet')

nodes.head(10)

Unnamed: 0,level,title,type,description,source_id,degree,human_readable_id,id,size,graph_embedding,community,top_level_node_id,x,y
0,0,"""HERBERT WOISETSCHLAGER, ALEXANDER ERBEN, SHIQ...","""PERSON"", ""ORGANIZATION"")(""ENTITY""","""ACM Reference Format""",e9aa2e30f8a16ab2a2fa9be1c46e9de1,0,0,b45241d70f0e43fca764df95b2b81f77,0,,,b45241d70f0e43fca764df95b2b81f77,0,0
1,0,"""AGX ORIN""","""ORGANIZATION"", ""AGX ORIN""","""person""",0d66389e93327df8f525277dda4617e5,0,1,4119fd06010c494caa07f439b333f4c5,0,,,4119fd06010c494caa07f439b333f4c5,0,0
2,0,"""ORGANIZATION""","""THE ORGANIZATION IS NOT EXPLICITLY MENTIONED ...","The Organization, a prominent entity, holds a...","7befbf2cdd18e8189b0f6e34637a77f3,7c22470c6324e...",0,2,d3835bf3dda84ead99deadbeac5d0d7d,0,,,d3835bf3dda84ead99deadbeac5d0d7d,0,0
3,0,"""GNNS""","""TECHNOLOGY""","""GNNs"" refers to Graph Neural Networks, a type...",242307f545da2144b2e3affbd99017d2,1,3,077d2820ae1845bcbb1803379a3d1eae,1,,,077d2820ae1845bcbb1803379a3d1eae,0,0
4,0,"""AI AND MACHINE LEARNING LANDSCAPE""","""CONCEPT""","""AI and Machine Learning Landscape"" is the fie...",242307f545da2144b2e3affbd99017d2,0,4,3671ea0dd4e84c1a9b02c5ab2c8f4bac,0,,,3671ea0dd4e84c1a9b02c5ab2c8f4bac,0,0
5,0,"""QUANTUM COMPUTING""","""TECHNOLOGY""","""Quantum Computing"" is a technology that can e...",242307f545da2144b2e3affbd99017d2,1,5,19a7f254a5d64566ab5cc15472df02de,1,,,19a7f254a5d64566ab5cc15472df02de,0,0
6,0,"""ADVANCEMENTS""","""EVENT""","""Advancements"" refers to the continuous improv...",242307f545da2144b2e3affbd99017d2,0,6,e7ffaee9d31d4d3c96e04f911d0a8f9e,0,,,e7ffaee9d31d4d3c96e04f911d0a8f9e,0,0
7,0,"""DEEPSET""","""ORGANIZATION"", ""MOLLER ET AL.""","The DEEPSET, established in 2023, is an annot...","6432a1a2eeff7c0f772b6fd06da0131a,af767269307bc...",0,7,f7e11b0e297a44a896dc67928368f600,0,,,f7e11b0e297a44a896dc67928368f600,0,0
8,0,"""SQUAD DATASET""","""EVENT"", ""RAJPURKAR ET AL.""","""person"", ""Cloud-era Fast Forward Labs""",af767269307bcd4abab0dc93481d3a9c,0,8,1fd3fa8bb5a2408790042ab9573779ee,0,,,1fd3fa8bb5a2408790042ab9573779ee,0,0
9,0,"""HUGGINGFACE COMMUNITY""","""ORGANIZATION"", ""WOLF ET AL.""","""person"", ""QA models""",af767269307bcd4abab0dc93481d3a9c,0,9,27f9fbe6ad8c4a8b9acee0d3596ed57c,0,,,27f9fbe6ad8c4a8b9acee0d3596ed57c,0,0


##### Community Report Generation & Summarization


In [10]:
community_reports = pd.read_parquet('./ragtest/output/20250421-001836/artifacts/create_final_community_reports.parquet')

community_reports.head()

Unnamed: 0,community,full_content,level,rank,title,rank_explanation,summary,findings,full_content_json,id
0,1,# NLP Model Accuracy Comparison\n\nThis report...,0,9.7,NLP Model Accuracy Comparison,The high impact severity rating is due to the ...,This report presents a comparison of four NLP ...,[{'explanation': 'The accuracy of BERT-FT was ...,"{\n ""title"": ""NLP Model Accuracy Comparison...",ae255080-895f-40dc-86db-e8ad6c7f4d52
1,3,# NLP Model Training Datasets Community\n\nThi...,0,7.5,NLP Model Training Datasets Community,The impact severity rating of 7.5 indicates th...,This report examines a community centered arou...,"[{'explanation': 'The DGA dataset, with a degr...","{\n ""title"": ""NLP Model Training Datasets C...",9e9bff7b-304b-4852-bbb6-a840474f8384


## ==== Graph RAG Retrieval Function ====

In [13]:
def query_graphrag(
    query: str,
    method: str = "global",
    root_path: str = "./ragtest",
    timeout: Optional[int] = None,
    community_level: int = 2,
    dynamic_community_selection: bool = False
) -> str:
    if community_level < 0:
        raise ValueError("Community level must be non-negative")

    command = [
        "python", "-m", "graphrag.query",
        "--root", root_path,
        "--method", method,
        "--community_level", str(community_level)
    ]

    if dynamic_community_selection:
        command.append("--dynamic-community-selection")

    command.append(query)

    try:
        result = subprocess.run(
            command,
            capture_output=True,
            text=True,
            timeout=timeout
        )
        result.check_returncode()
        return result.stdout.strip()

    except subprocess.CalledProcessError as e:
        print("=== STDOUT ===")
        print(e.stdout)
        print("=== STDERR ===")
        print(e.stderr)
        raise


### Local Search

<img src="./media/local_search.png" width=900>

The GraphRAG approach to local search is the most similar to regular semantic RAG search. It combines structured data from the knowledge graph with unstructured data from the input documents to augment the LLM context with relevant entity information. In essence, we are going to first search for relevant entities to the query using semantic search. These become the entry points on our graph that we can now traverse. Starting at these points, we look at connected chunks of text, community reports, other entities, and relationships between them. All of the data retrieved is filtered and ranked to fit into a pre-defined context window.

In [15]:
result = query_graphrag(
    query="What is machine learning?",
    method="local",
    root_path=r"C:\Users\ACER NITRO\OneDrive\Bureau\Project 2SCI\Automated-Information-Retrieval-and-Summarization-for-Academic-Research-Articles\RAG_Models(notebooks)\DAIT_DEHANE_Yacine\GraphRAG\ragtest"
)
print("Query result:")
print(result)


=== STDOUT ===


INFO: Reading settings from C:\Users\ACER NITRO\OneDrive\Bureau\Project 2SCI\Automated-Information-Retrieval-and-Summarization-for-Academic-Research-Articles\RAG_Models(notebooks)\DAIT_DEHANE_Yacine\GraphRAG\ragtest\settings.yaml
creating llm client with {'api_key': 'REDACTED,len=9', 'type': "openai_chat", 'model': 'deepseek-r1:1.5b', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 1.0, 'request_timeout': 180.0, 'api_base': 'http://localhost:11434/v1', 'api_version': None, 'organization': None, 'proxy': None, 'cognitive_services_endpoint': None, 'deployment_name': None, 'model_supports_json': True, 'tokens_per_minute': 0, 'requests_per_minute': 0, 'max_retries': 10, 'max_retry_wait': 10.0, 'sleep_on_rate_limit_recommendation': True, 'concurrent_requests': 25}
creating embedding llm client with {'api_key': 'REDACTED,len=9', 'type': "openai_embedding", 'model': 'all-minilm:l6-v2', 'max_tokens': 4000, 'temperature': 0, 'top_p': 1, 'request_timeout': 180.0, 'api_base': 'h

CalledProcessError: Command '['python', '-m', 'graphrag.query', '--root', 'C:\\Users\\ACER NITRO\\OneDrive\\Bureau\\Project 2SCI\\Automated-Information-Retrieval-and-Summarization-for-Academic-Research-Articles\\RAG_Models(notebooks)\\DAIT_DEHANE_Yacine\\GraphRAG\\ragtest', '--method', 'local', 'What is machine learning?']' returned non-zero exit status 1.

### Global Search

<img src="./media/global_search.png" width=1000>

Through the semantic clustering of communities during the indexxing process outlined above we created community reports as summaries of high level themes across these groupings. Having this community summary data at various levels allows us to do something that traditional RAG performs poorly at, answering queries about broad themes and ideas across our unstructured data.

To capture as much broad information as possible in an efficient manner, GraphRAG implements a [map reduce](https://en.wikipedia.org/wiki/MapReduce) approach. Given a query, relevant community node reports at a specific hierarchical level are retrieved. These are shuffled and chunked, where each chunk is used to generate a list of points that each have their own "importance score". These intermediate points are ranked and filtered, attempting to maintain the most important points. These become the aggregate intermediary response, which is passed to the LLM as the context for the final response.

In [None]:
result = query_graphrag(
    query="How does a company choose between RAG, fine-tuning, and different PEFT approaches?",
    method="global"
)
print("Query result:")
print(result)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Query result:
creating llm client with {'api_key': 'REDACTED,len=51', 'type': "openai_chat", 'encoding_model': 'cl100k_base', 'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 1.0, 'n': 1, 'frequency_penalty': 0.0, 'presence_penalty': 0.0, 'request_timeout': 180.0, 'api_base': None, 'api_version': None, 'organization': None, 'proxy': None, 'audience': None, 'deployment_name': None, 'model_supports_json': True, 'tokens_per_minute': 0, 'requests_per_minute': 0, 'max_retries': 10, 'max_retry_wait': 10.0, 'sleep_on_rate_limit_recommendation': True, 'concurrent_requests': 25, 'responses': None}

SUCCESS: Global Search Response:
### Choosing Between RAG, Fine-Tuning, and PEFT Approaches

When a company is deciding between Retrieval-Augmented Generation (RAG), fine-tuning, and Parameter-Efficient Fine-Tuning (PEFT) approaches, several key factors must be considered. These factors include the specific requirements of the application, the need for external data integration, co

### DRIFT Search

<img src="./media/drift_search.png" width=1000>

[Dynamic Reasoning and Inference with Flexible Traversal](https://www.microsoft.com/en-us/research/blog/introducing-drift-search-combining-global-and-local-search-methods-to-improve-quality-and-efficiency/), or DRIFT, is a novel GraphRAG concept introduced by Microsoft as an approach to local search queries that include community information in the search process.

The user's query is initially processed through [Hypothetical Document Embedding (HyDE)](https://arxiv.org/pdf/2212.10496), which creates a hypothetical document similar to those found in the graph already, but using the user's topic query. This document is embedded and used for semantic retrieval of the top-k relevant community reports. From these matches, we generate an initial answer along with several follow-up questions as a lightweight version of global search. They refer to this as the primer.

Once this primer phase is complete, we execute local searches for each follow-up question generated. Each local search produces both intermediate answers and new follow-up questions, creating a refinement loop. This loop runs for two iterations (noted future research planned to develop reward functions for smarter termination). An important note that makes these local searches unique is that they are informed by both community-level knowledge and detailed entity/relationship data. This allows the DRIFT process to find relevant information even when the initial query diverges from the indexing persona, and it can adapt its approach based on emerging information during the search.

The final output is structured as a hierarchy of questions and answers, ranked by their relevance to the original query. Map reduce is used again with an equal weighting on all intermediate answers, then passed to the language model for a final response. DRIFT cleverly combines global and local search with guided exploration to provide both broad context and specific details in responses.

In [None]:
result = query_graphrag(
    query="How does a company choose between RAG, fine-tuning, and different PEFT approaches?",
    method="drift"
)
print("Query result:")
print(result)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Query result:
INFO: Vector Store Args: {
    "type": "lancedb",
    "db_uri": "/Users/adamlucek/Desktop/github/GraphRAG/ragtest/output/lancedb",
    "container_name": "==== REDACTED ====",
    "overwrite": true
}
creating llm client with {'api_key': 'REDACTED,len=51', 'type': "openai_chat", 'encoding_model': 'cl100k_base', 'model': 'gpt-4o', 'max_tokens': 4000, 'temperature': 0.0, 'top_p': 1.0, 'n': 1, 'frequency_penalty': 0.0, 'presence_penalty': 0.0, 'request_timeout': 180.0, 'api_base': None, 'api_version': None, 'organization': None, 'proxy': None, 'audience': None, 'deployment_name': None, 'model_supports_json': True, 'tokens_per_minute': 0, 'requests_per_minute': 0, 'max_retries': 10, 'max_retry_wait': 10.0, 'sleep_on_rate_limit_recommendation': True, 'concurrent_requests': 25, 'responses': None}
creating embedding llm client with {'api_key': 'REDACTED,len=51', 'type': "openai_embedding", 'encoding_model': 'cl100k_base', 'model': 'text-embedding-3-small', 'max_tokens': 4000, 'tem

---

## Comparing to Regular Vector Database Retrieval

<img src="./media/basic_retrieval.png" width=600>
 
To give some comparison, let's look back at traditional chunking, embedding, and similarity retrieval RAG

**Instantiate our Database**

For this we'll be using [ChromaDB](https://www.trychroma.com) with the same chunks as were loaded into our graph.

In [None]:
import chromadb

chroma_client = chromadb.PersistentClient(path="./notebook/chromadb")
paper_collection = chroma_client.get_or_create_collection(name="paper_collection")

**Embed Chunks Into Collection**

In [None]:
i = 0
for text in texts:
    paper_collection.add(
        documents=[text],
        ids=f"chunk_{i}"
    )
    i += 1

**Retrieval Function**

In [None]:
def chroma_retrieval(query, num_results=5):
    results = paper_collection.query(
        query_texts=[query],
        n_results=num_results
    )
    return results

**RAG Prompt & Chain**

In [None]:
rag_prompt_template = """
Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.

If you don't know the answer, just say so. Do not make anything up.

Do not include information where the supporting evidence for it is not provided.

Context: {retrieved_docs}

User Question: {query}

"""

rag_prompt = ChatPromptTemplate.from_template(rag_prompt_template)

rag_chain = rag_prompt | llm | StrOutputParser()

**RAG Function**

In [None]:
def chroma_rag(query):
    retrieved_docs = chroma_retrieval(query)["documents"][0]
    response = rag_chain.invoke({"retrieved_docs": retrieved_docs, "query": query})
    return response

**RAG Response**

In [None]:
response = chroma_rag("How does a company choose between RAG, fine-tuning, and different PEFT approaches?")
print(response)

When choosing between Retrieval-Augmented Generation (RAG), fine-tuning, and different Parameter-Efficient Fine-Tuning (PEFT) approaches, a company should consider several factors:

1. **Data Access and Updates**: RAG is preferable for applications requiring access to external data sources or environments where data frequently updates. It provides dynamic data retrieval capabilities and is less prone to generating incorrect information.

2. **Model Behavior and Domain-Specific Knowledge**: Fine-tuning is suitable when the model needs to adjust its behavior, writing style, or incorporate domain-specific knowledge. It is effective if there is ample domain-specific, labeled training data available.

3. **Resource Constraints and Efficiency**: PEFT approaches like LoRA and DEFT are designed to reduce computational and resource requirements. LoRA focuses on low-rank matrices to reduce memory usage and computational load, while DEFT optimizes the fine-tuning process by focusing on the most c

---
## Discussion

**Traditional/Naive RAG:**

Benefits:
- Simpler implementation and deployment
- Works well for straightforward information retrieval tasks
- Good at handling unstructured text data
- Lower computational overhead

Drawbacks:
- Loses structural information when chunking documents
- Can break up related content during text segmentation
- Limited ability to capture relationships between different pieces of information
- May struggle with complex reasoning tasks requiring connecting multiple facts
- Potential for incomplete or fragmented answers due to chunking boundaries

**GraphRAG:**

Benefits:
- Preserves structural relationships and hierarchies in the knowledge
- Better at capturing connections between related information
- Can provide more complete and contextual answers
- Improved retrieval accuracy by leveraging graph structure
- Better supports complex reasoning across multiple facts
- Can maintain document coherence better than chunk-based approaches
- More interpretable due to explicit knowledge representation

Drawbacks:
- More complex to implement and maintain
- Requires additional processing to construct and update knowledge graphs
- Higher computational overhead for graph operations
- May require domain expertise to define graph schema/structure
- More challenging to scale to very large datasets
- Additional storage requirements for graph structure

**Key Differentiators:**
1. Knowledge Representation: Traditional RAG treats everything as flat text chunks, while GraphRAG maintains structured relationships in a graph format

2. Context Preservation: GraphRAG better preserves context and relationships between different pieces of information compared to the chunking approach of traditional RAG

3. Reasoning Capability: GraphRAG enables better multi-hop reasoning and connection of related facts through graph traversal, while traditional RAG is more limited to direct retrieval

4. Answer Quality: GraphRAG tends to produce more complete and coherent answers since it can access related information through graph connections rather than being limited by chunk boundaries

The choice between traditional RAG and GraphRAG often depends on the specific use case, with GraphRAG being particularly valuable when maintaining relationships between information is important or when complex reasoning is required. An important note as well, GraphRAG approaches still rely on regular embedding and retrieval methods themselves. They compliment eahcother!