In [1]:
import os
import time
import matplotlib.pyplot as plt
from dotenv import load_dotenv

load_dotenv()

from embedding import GeminiEmbeddings
from document_processor import DocumentProcessor, ContextualHeaderProcessor
from vector_store import VectorstoreManager
from retrieval import StandardRetriever, ContextualHeaderRetriever
from llm_interface import get_openrouter_llm, StandardRAGChain, ContextualHeaderRAGChain
from evaluation import RAGEvaluator

# Check if environment variables are set
required_env_vars = ["GEMINI_API_KEY", "PINECONE_API_KEY", "OPENROUTER_API_KEY"]
missing_vars = [var for var in required_env_vars if not os.getenv(var)]

if missing_vars:
    print(f"Missing environment variables: {', '.join(missing_vars)}")
    print("Please set these variables in your .env file.")
else:
    print("All required environment variables are set.")

All required environment variables are set.


In [2]:
# Configuration
DOCS_DIR = "books"  # Path to the documents directory
CHUNK_SIZE = 2000      # Size of text chunks
CHUNK_OVERLAP = 200    # Overlap between chunks
REBUILD_INDEX = False   # Whether to rebuild the vector store index

# Initialize components

In [3]:
# Initialize the embedding model
print("Initializing the embedding model ...")
embeddings = GeminiEmbeddings()

# Initialize the LLM
print("Initializing the LLM ...")
llm = get_openrouter_llm()

print("Initializing the vector store")
vectorstore_manager = VectorstoreManager(embeddings)

Initializing the embedding model ...
Initializing the LLM ...
Initializing the vector store


# Process Documents and Create vector store

In [4]:
if REBUILD_INDEX:
    print("Rebuilding index from documents ...")
    doc_processor = DocumentProcessor(DOCS_DIR,
                                      chunk_size=CHUNK_SIZE,
                                      chunk_overlap=CHUNK_OVERLAP)
    standard_docs = doc_processor.process_documents()
    print(f"Processed {len(standard_docs)} document chunks for standard RAG")

    # Display a sample document chunk
    if standard_docs:
        print("\nSample standard document chunk:")
        print(f"Source: {standard_docs[0].metadata.get("source", "unknown")}")
        print(f"Content (first 300 chars): {standard_docs[0].page_content[:300]}...")

    # Contextual Header document processing
    contextual_doc_processor = DocumentProcessor(
        llm = LLM,
        chunk_size = CHUNK_SIZE,
        chunk_overlap = CHUNK_OVERLAP,
        docs_dir = DOCS_DIR
    )
    contextual_docs = contextual_doc_processor.process_documents()
    print(f"Processed {len(contextual_docs)} document chunks for contextual RAG")

    # Display a sample contextual document chunk
    if contextual_docs:
        print("\nSample contextual document chunk:")
        print(f"Source: {contextual_docs[0].metadata.get("source", "unknown")}")
        print(f"Content (first 300 chars): {contextual_docs[0].page_content[:300]}...")

    # Add documents to the vector store
    # Clear any existing vector store
    print("Clearing any existing vector store")
    vectorstore_manager.clear_all()

    # Add documents to vector store
    print("Adding documents to standard RAG vector store...")
    vectorstore_manager.add_documents(standard_docs)
    # Add contextual documents to vector store
    print("Adding documents to contextual RAG vector store...")
    vectorstore_manager.add_documents(contextual_docs)

    

# Initialize Retrievers and RAG Chains

In [5]:
# Get vector stores
standard_vector_store = vectorstore_manager.get_standard_vector_store()
contextual_vector_store = vectorstore_manager.get_contextual_vector_store()

# Initialize retrievers
standard_retriever = StandardRetriever(standard_vector_store)
contextual_retriever = ContextualHeaderRetriever(contextual_vector_store)

# Initialize RAG chains
standard_rag = StandardRAGChain(llm, standard_retriever)
contextual_rag = ContextualHeaderRAGChain(llm, contextual_retriever)

# Initialize evaluator
evaluator = RAGEvaluator(embeddings)

# Query both RAGs

In [6]:
def test_query(query):
    print(f"\nquery: {query}")
    print("\n Processing...")

    # Measure standard RAG performance
    print("Running standard RAG...")
    standard_start = time.time()
    standard_response = standard_rag.invoke(query)
    standard_time = time.time() - standard_start

    # Measure contextual header RAG performance
    print("Running contextual header RAG...")
    contextual_start = time.time()
    contextual_response = contextual_rag.invoke(query)
    contextual_time = time.time() - contextual_start

    # Print results
    print("\n=== Standard RAG ===")
    print(f"Time: {standard_time:.2f}s")
    print(f"Response: {standard_response}")
    
    print("\n=== Contextual Header RAG ===")
    print(f"Time: {contextual_time:.2f}s")
    print(f"Response: {contextual_response}")
    
    # Return the responses and times for further analysis
    return {
        "standard": {"response": standard_response, "time": standard_time},
        "contextual": {"response": contextual_response, "time": contextual_time}
    }

In [7]:
import time
test_query("What are the key principles in the Declaration of Independence?")


query: What are the key principles in the Declaration of Independence?

 Processing...
Running standard RAG...
Running contextual header RAG...

=== Standard RAG ===
Time: 76.28s
Response: The key principles in the Declaration of Independence are:

1. **All men are created equal**: The document asserts that all individuals are born with inherent rights and dignity, and are equal in their inherent worth and value.
2. **Life, Liberty, and the pursuit of Happiness**: The document outlines the fundamental rights that governments are established to protect, including the right to life, liberty, and the pursuit of happiness.
3. **Governments derive their power from the consent of the governed**: The document argues that governments are instituted to represent the will of the people, and that the people have the right to alter or abolish a government that becomes destructive of these rights.
4. **The right to alter or abolish a government**: The document asserts that if a government becomes 

{'standard': {'response': 'The key principles in the Declaration of Independence are:\n\n1. **All men are created equal**: The document asserts that all individuals are born with inherent rights and dignity, and are equal in their inherent worth and value.\n2. **Life, Liberty, and the pursuit of Happiness**: The document outlines the fundamental rights that governments are established to protect, including the right to life, liberty, and the pursuit of happiness.\n3. **Governments derive their power from the consent of the governed**: The document argues that governments are instituted to represent the will of the people, and that the people have the right to alter or abolish a government that becomes destructive of these rights.\n4. **The right to alter or abolish a government**: The document asserts that if a government becomes tyrannical or abusive, the people have the right to throw it off and establish a new government that is more just and equitable.\n5. **The importance of indiv