In [1]:
import sys
sys.path.append('..')

from src.retrieval_system import RetrievalSystem
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
try:
    with open('../processed_documents.pkl', 'rb') as f:
        documents = pickle.load(f)
    print(f"✅ Loaded {len(documents)} processed documents")
except FileNotFoundError:
    print("❌ No processed documents found. Run 02_document_processing.ipynb first")
    documents = []

✅ Loaded 2 processed documents


In [3]:
if documents:
    print("🔄 Initializing retrieval system...")
    retrieval_system = RetrievalSystem(documents)
    print("✅ Retrieval system ready!")

🔄 Initializing retrieval system...
Creating vector store...
Vector store created with 2 documents
⚠️ langchain-graph-retriever not available; falling back to vector-only retriever.
✅ Retrieval system ready!


In [4]:
if documents:
    test_queries = [
        "What is GraphRAG?",
        "What technologies are used in this system?",
        "What are the benefits of using GraphRAG?",
        "How does the system work?",
        "What are the key components?"
    ]
    
    print("🧪 Testing basic queries...")
    for i, query in enumerate(test_queries, 1):
        print(f"\n📝 Query {i}: {query}")
        try:
            response = retrieval_system.query(query)
            print(f"✅ Response: {response[:400]}...")
        except Exception as e:
            print(f"❌ Error: {str(e)}")

🧪 Testing basic queries...

📝 Query 1: What is GraphRAG?
✅ Response: GraphRAG is a system that combines graph traversal with vector search to enhance context understanding and retrieval accuracy. It is implemented using technologies like LangChain, Gemini (a large language model), and Neo4j (a graph database) with Python.

Key components of a GraphRAG system include:
*   **Document Processing**: Utilizes semantic chunking to process documents.
*   **Knowledge Graph...

📝 Query 2: What technologies are used in this system?
✅ Response: The technologies used in this system are:

*   **LangChain**: A framework for building LLM applications. (Source: sample_graphrag.docx)
*   **Gemini**: Google's large language model for text processing. (Source: sample_graphrag.docx)
*   **Neo4j**: A graph database for storing relationships. (Source: sample_graphrag.docx)
*   **Python**: The programming language used for implementation. (Source: s...

📝 Query 3: What are the benefits of using GraphRAG?
✅ R

In [5]:
if documents:
    print("\n💬 Interactive Query Interface")
    print("Type 'quit' to exit")
    
    while True:
        user_query = input("\n🤔 Your question: ").strip()
        
        if user_query.lower() in ['quit', 'exit', 'q']:
            print("👋 Goodbye!")
            break
        
        if user_query:
            try:
                print("🔄 Processing...")
                response = retrieval_system.query(user_query)
                print(f"\n🤖 Response:\n{response}\n")
            except Exception as e:
                print(f"❌ Error: {str(e)}")
        else:
            print("Please enter a question.")


💬 Interactive Query Interface
Type 'quit' to exit
🔄 Processing...

🤖 Response:
The GraphRAG system is used to enhance information retrieval and understanding by combining the strengths of large language models (LLMs) with graph databases.

Specifically, its uses include:
*   **Better Context Understanding:** It leverages relationships within a knowledge graph to provide a richer understanding of the context surrounding information (Source: sample_graphrag.docx).
*   **More Accurate Retrieval:** By combining graph traversal with vector search (hybrid retrieval), it enables more precise and relevant information retrieval (Source: sample_graphrag.docx).
*   **Explainable Results:** The inherent structure of the graph allows for more transparent and explainable results, as the connections and relationships can be visualized and understood (Source: sample_graphrag.docx).

In essence, it processes documents using semantic chunking, creates a knowledge graph by extracting entities and relati

In [6]:
if documents:
    import time
    
    print("📊 Performance Testing...")
    
    performance_queries = [
        "What is the main purpose of this system?",
        "List all the technologies mentioned",
        "How do the components work together?",
    ]
    
    for query in performance_queries:
        start_time = time.time()
        response = retrieval_system.query(query)
        end_time = time.time()
        
        print(f"\nQuery: {query}")
        print(f"Response time: {end_time - start_time:.2f} seconds")
        print(f"Response length: {len(response)} characters")

📊 Performance Testing...

Query: What is the main purpose of this system?
Response time: 2.83 seconds
Response length: 369 characters

Query: List all the technologies mentioned
Response time: 1.54 seconds
Response length: 202 characters

Query: How do the components work together?
Response time: 4.79 seconds
Response length: 2364 characters
