### Imports and Path setup

In [53]:
from pathlib import Path
import chromadb
import pickle
import os
from dotenv import load_dotenv
load_dotenv()

multiquery_rag_output_path = "../RAG Results/multiquery_rag_results.txt"
Relative_Database_path = "./chroma_Data"
Absolute_Database_path = Path(Relative_Database_path).resolve()
file_path = "../Chunking/Chunk_files/harry_potter_chunks_hierarchical.pkl"
# Create a new collection with a unique name
collection_name = "harry_potter_collection"
# Set API key
os.environ["GOOGLE_API_KEY"] = os.environ.get("GEMINI_API_KEY")


### Chroma Setup and Chunk Loading
Sets up persistant client and loads previously computed chunks

In [54]:
# Initialize the persistent client
client = chromadb.PersistentClient(path=Absolute_Database_path)
print(f"[INFO] ChromaDB client initialized at: {Absolute_Database_path}")

# List existing collections
existing_collections = client.list_collections()
print(f"Existing collections: {[c.name for c in existing_collections]}")

[INFO] ChromaDB client initialized at: C:\Users\micro\Desktop\Abhinav college\Resources\Sem 7\Advanced NLP\RAG_for_research_papers\VectorDB\chroma_Data
Existing collections: ['harry_potter_collection', 'my_collection']


In [55]:

# No need for fitz or RecursiveCharacterTextSplitter here, as we are loading from a file.


loaded_docs = []

try:
    with open(file_path, "rb") as f: # 'rb' mode for reading in binary
        loaded_docs = pickle.load(f)
    print(f"Successfully loaded {len(loaded_docs)} chunks from '{file_path}'.")
except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"Error loading file: {e}")

# Now you can inspect the loaded documents to verify.
print("\nHere is the metadata of a loaded chunk:")
if loaded_docs:
    print(loaded_docs[0].metadata)

Successfully loaded 3382 chunks from '../Chunking/Chunk_files/harry_potter_chunks_hierarchical.pkl'.

Here is the metadata of a loaded chunk:
{'source': '../harrypotter.pdf', 'page_number': 1, 'chunk_type': 'section', 'chunk_level': 1, 'section_id': 'page_1_section_0', 'parent_id': 'page_1', 'chunk_index': 0}


### Set up Embedding Function
Will use default SentenceTransformer for generating embeddings

In [56]:
# Install if needed
# !pip install sentence_transformers

# Set up embedding function
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
embedding_function = SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
print("Embedding function initialized with model: all-MiniLM-L6-v2")

Embedding function initialized with model: all-MiniLM-L6-v2


### Creating new Collection

In [57]:
from datetime import datetime



# Get or create the collection
client.delete_collection(name=collection_name)  
collection = client.get_or_create_collection(
    name=collection_name,
    embedding_function=embedding_function,
    metadata={
        "description": "Harry Potter book chunks",
        "created": str(datetime.now())
    }
)

print(f"Collection '{collection_name}' created or accessed successfully")

Collection 'harry_potter_collection' created or accessed successfully


### Add data to collection
The chunks have to be given an id and added to the collection now

In [58]:
import uuid

# Prepare documents for ChromaDB
ids = []
documents = []
metadatas = []

# Process each loaded document chunk
for i, doc in enumerate(loaded_docs):
    # Generate a unique ID (you could use a more deterministic approach if needed)
    doc_id = f"hp_chunk_{i}"
    
    # Get the document text
    document_text = doc.page_content
    
    # Get the document metadata
    metadata = doc.metadata
    
    # Add to our lists
    ids.append(doc_id)
    documents.append(document_text)
    metadatas.append(metadata)

# Add documents in batches to avoid memory issues
batch_size = 500
total_added = 0

for i in range(0, len(ids), batch_size):
    end_idx = min(i + batch_size, len(ids))
    
    # collection.update(
    #     ids=ids[i:end_idx],
    #     documents=documents[i:end_idx],
    #     metadatas=metadatas[i:end_idx]
    # )
    collection.add(
        ids=ids[i:end_idx],
        documents=documents[i:end_idx],
        metadatas=metadatas[i:end_idx]
    )
    
    total_added += end_idx - i
    print(f"Added batch: {i} to {end_idx-1} ({end_idx-i} items)")

print(f"Successfully added {total_added} documents to collection '{collection_name}'")

Added batch: 0 to 499 (500 items)
Added batch: 500 to 999 (500 items)
Added batch: 500 to 999 (500 items)
Added batch: 1000 to 1499 (500 items)
Added batch: 1000 to 1499 (500 items)
Added batch: 1500 to 1999 (500 items)
Added batch: 1500 to 1999 (500 items)
Added batch: 2000 to 2499 (500 items)
Added batch: 2000 to 2499 (500 items)
Added batch: 2500 to 2999 (500 items)
Added batch: 2500 to 2999 (500 items)
Added batch: 3000 to 3381 (382 items)
Successfully added 3382 documents to collection 'harry_potter_collection'
Added batch: 3000 to 3381 (382 items)
Successfully added 3382 documents to collection 'harry_potter_collection'


In [59]:
# Check collection count
count = collection.count()
print(f"Total documents in collection: {count}")

# Peek at the first few entries
peek = collection.peek(limit=3)
print("\nSample entries:")
for i, (doc_id, doc_text, metadata) in enumerate(zip(
    peek['ids'], peek['documents'], peek['metadatas']
)):
    print(f"\n--- Document {i+1} ---")
    print(f"ID: {doc_id}")
    print(f"Text: {doc_text[:100]}...")
    print(f"Metadata: {metadata}")

Total documents in collection: 3382

Sample entries:

--- Document 1 ---
ID: hp_chunk_0
Text: M
 
CHAPTER  ONE
THE BOY WHO LIVED
r. and Mrs. Dursley, of number four, Privet Drive, were proud to ...
Metadata: {'chunk_type': 'section', 'parent_id': 'page_1', 'section_id': 'page_1_section_0', 'chunk_index': 0, 'chunk_level': 1, 'source': '../harrypotter.pdf', 'page_number': 1}

--- Document 2 ---
ID: hp_chunk_1
Text: M
 
CHAPTER  ONE
THE BOY WHO LIVED
r. and Mrs. Dursley, of number four, Privet Drive, were proud to ...
Metadata: {'source': '../harrypotter.pdf', 'chunk_index': 0, 'chunk_level': 2, 'page_number': 1, 'chunk_type': 'paragraph', 'section_id': 'page_1_section_0', 'paragraph_id': 'page_1_section_0_para_0', 'parent_id': 'page_1_section_0'}

--- Document 3 ---
ID: hp_chunk_2
Text: M
 
CHAPTER  ONE
THE BOY WHO LIVED
r. and Mrs. Dursley, of number four, Privet Drive, were proud to ...
Metadata: {'sentence_id': 'page_1_section_0_para_0_sent_0', 'chunk_index': 0, 'section_id': 'page_1

### Querying the Database

In [60]:
# Rich table for displaying results (optional but nice)
try:
    from rich.console import Console
    from rich.table import Table
    
    console = Console()
    use_rich = True
except ImportError:
    use_rich = False
    print("Rich package not found. Using standard print.")

# Function to display query results
def print_results(results, use_rich=use_rich):
    if use_rich:
        table = Table(show_header=True, header_style="bold magenta")
        table.add_column("Rank", width=6)
        table.add_column("Document ID")
        table.add_column("Document Text", width=60)
        table.add_column("Page")
        table.add_column("Distance")
        
        docs = results['documents'][0]
        ids = results['ids'][0]
        metas = results['metadatas'][0]
        distances = results['distances'][0]
        
        for i, (doc, doc_id, meta, dist) in enumerate(zip(docs, ids, metas, distances)):
            table.add_row(
                str(i+1),
                doc_id,
                (doc[:100] + "...") if len(doc) > 100 else doc,
                str(meta.get('page_number', 'N/A')),
                f"{dist:.4f}"
            )
        
        console.print(table)
    else:
        # Standard print version
        for i, (doc, meta, dist) in enumerate(zip(
            results['documents'][0], 
            results['metadatas'][0], 
            results['distances'][0]
        )):
            print(f"\n--- Result {i+1} ---")
            print(f"Text: {doc[:100]}...")
            print(f"Metadata: {meta}")
            print(f"Distance: {dist:.4f}")



In [61]:
# Run a query
query = "Who was Dumbledore? When was he first introduced?"
results = collection.query(
    query_texts=[query],
    n_results=3,
    include=["documents", "metadatas", "distances"]
)

print(f"\nResults for query: '{query}'")
print_results(results)


Results for query: 'Who was Dumbledore? When was he first introduced?'


### Natural Language Generation

In [62]:
!pip install google-generativeai langchain-google-genai



In [63]:
import os
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI


# Initialize Gemini (fixed the model name - using a valid Gemini model)
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.7)

In [64]:
from langchain.prompts import PromptTemplate

# Better prompt
rag_prompt_template = """
You are an expert on Harry Potter books. Answer questions using ONLY the context below.
If you can't find a complete answer in the context but see partial information, try to provide what you can find and acknowledge the limitations of the available information.
If there is NO relevant information at all in the context, respond with "I don't have enough information to answer this question."

Context:
{context}

Question: {query}

Answer (based only on the context provided):
"""

prompt = PromptTemplate(
    template=rag_prompt_template,
    input_variables=["context", "query"]
)

In [65]:
!pip install rank_bm25



In [66]:
from rank_bm25 import BM25Okapi
import numpy as np

def answer_with_hybrid_rag(query, n_results=5):
    # 1. Semantic search with ChromaDB
    semantic_results = collection.query(
        query_texts=[query],
        n_results=n_results,
        include=["documents", "metadatas", "distances"]
    )
    
    # 2. Perform keyword search with BM25
    # First get all documents to search across
    all_docs = collection.get(
        limit=100,  # Adjust based on your collection size
        include=["documents", "metadatas"]
    )
    
    # Tokenize for BM25
    tokenized_docs = [doc.split() for doc in all_docs["documents"]]
    bm25 = BM25Okapi(tokenized_docs)
    
    # Get BM25 scores
    tokenized_query = query.split()
    bm25_scores = bm25.get_scores(tokenized_query)
    
    # Get top BM25 results
    top_bm25_indices = np.argsort(bm25_scores)[-n_results:][::-1]
    
    # 3. Combine results (simple union)
    combined_docs = []
    combined_meta = []
    combined_ids = [] 
    seen_ids = set()
    
    # Add semantic results
    for doc, meta, doc_id in zip(
        semantic_results["documents"][0], 
        semantic_results["metadatas"][0],
        semantic_results["ids"][0]
    ):
        if doc_id not in seen_ids:
            combined_docs.append(doc)
            combined_meta.append(meta)
            combined_ids.append(doc_id)  # Store the id
            seen_ids.add(doc_id)
    
    # Add keyword results
    for idx in top_bm25_indices:
        doc_id = all_docs["ids"][idx]
        if doc_id not in seen_ids:
            combined_docs.append(all_docs["documents"][idx])
            combined_meta.append(all_docs["metadatas"][idx])
            combined_ids.append(doc_id)  # Store the id
            seen_ids.add(doc_id)
    
    # Limit to n_results total
    combined_docs = combined_docs[:n_results]
    combined_meta = combined_meta[:n_results]
    combined_ids = combined_ids[:n_results]
    
    # Format context and complete RAG as before
    formatted_docs = []
    for doc, meta in zip(combined_docs, combined_meta):
        page_num = meta.get("page_number", "unknown")
        formatted_docs.append(f"[Page {page_num}]: {doc}")
    
    context = "\n\n---\n\n".join(formatted_docs)
    filled_prompt = prompt.format(context=context, query=query)
    response = llm.invoke(filled_prompt)
    
    # Create a mock results object for print_results compatibility
    mock_results = {
        "documents": [combined_docs],
        "metadatas": [combined_meta],
        "distances": [[0.0] * len(combined_docs)],  # Placeholder distances
        "ids": [combined_ids]  # Add this line
    }
    
    return {
        "query": query,
        "answer": response.content if hasattr(response, 'content') else str(response),
        "source_documents": mock_results
    }

In [67]:
# Test our RAG pipeline with a question
test_query = "What happened when Harry first met Hagrid?"
response = answer_with_hybrid_rag(test_query)

print(f"Question: {test_query}")
print(f"\nAnswer: {response['answer']}")
print("\nSources:")
print_results(response["source_documents"])

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Quota exceeded for quota metric 'Generate Content API requests per minute' and limit 'GenerateContent request limit per minute for a region' of service 'generativelanguage.googleapis.com' for consumer 'project_number:381643304294'. [reason: "RATE_LIMIT_EXCEEDED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "quota_unit"
  value: "1/min/{project}/{region}"
}
metadata {
  key: "quota_metric"
  value: "generativelanguage.googleapis.com/generate_content_requests"
}
metadata {
  key: "quota_location"
  value: "asia-east1"
}
metadata {
  key: "quota_limit"
  value: "GenerateContentRequestsPerMinutePerProjectPerRegion"
}
metadata {
  key: "quota_limit_value"
  value: "0"
}
metadata {
  key: "consumer"
  value: "projects/381643304294"
}
, links {
  description: "Request a higher quota li

ResourceExhausted: 429 Quota exceeded for quota metric 'Generate Content API requests per minute' and limit 'GenerateContent request limit per minute for a region' of service 'generativelanguage.googleapis.com' for consumer 'project_number:381643304294'. [reason: "RATE_LIMIT_EXCEEDED"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
metadata {
  key: "quota_unit"
  value: "1/min/{project}/{region}"
}
metadata {
  key: "quota_metric"
  value: "generativelanguage.googleapis.com/generate_content_requests"
}
metadata {
  key: "quota_location"
  value: "asia-east1"
}
metadata {
  key: "quota_limit"
  value: "GenerateContentRequestsPerMinutePerProjectPerRegion"
}
metadata {
  key: "quota_limit_value"
  value: "0"
}
metadata {
  key: "consumer"
  value: "projects/381643304294"
}
, links {
  description: "Request a higher quota limit."
  url: "https://cloud.google.com/docs/quotas/help/request_increase"
}
]

In [None]:
# Test with multiple questions to evaluate system
results_for_export = []

test_questions = [
    "Who is Voldemort and why is he feared?",
    "What are the four houses at Hogwarts?",
    "How did Harry survive the killing curse as a baby?"
]

for question in test_questions:
    print("\n" + "="*50)
    print(f"Question: {question}")
    response = answer_with_hybrid_rag(question)
    print(f"\nAnswer: {response['answer']}")
    print("\nTop source:")
    if len(response["source_documents"]["documents"][0]) > 0:
        top_doc = response["source_documents"]["documents"][0][0]
        top_meta = response["source_documents"]["metadatas"][0][0]
        page = top_meta.get("page_number", "N/A")
        print(f"[Page {page}]:\n{top_doc}")  # Print full chunk
        # Save for export
        results_for_export.append({
            "question": question,
            "answer": response['answer'],
            "page": page,
            "chunk": top_doc
        })
    else:
        print("No sources found.")
        results_for_export.append({
            "question": question,
            "answer": response['answer'],
            "page": None,
            "chunk": None
        })

# Export results to a well-formatted text file
with open(multiquery_rag_output_path, "w", encoding="utf-8") as f:
    f.write("RAG Multi-Query Evaluation Results\n")
    f.write("="*60 + "\n\n")
    for idx, res in enumerate(results_for_export, 1):
        f.write(f"Question {idx}: {res['question']}\n")
        f.write(f"Answer:\n{res['answer']}\n\n")
        if res["chunk"]:
            f.write(f"Top Source Chunk (Page {res['page']}):\n{res['chunk']}\n")
        else:
            f.write("Top Source Chunk: No sources found.\n")
        f.write("-"*60 + "\n\n")
print(f"\nResults exported to {multiquery_rag_output_path}")


Question: Who is Voldemort and why is he feared?

Answer: Voldemort is a powerful wizard who started gaining followers about twenty years ago, causing "Dark days" where people didn't know who to trust or dare to get friendly with strange wizards or witches. He is referred to as "Lord Voldemort" by his follower Quirrell, who considers him a "great wizard" and his "master." Voldemort believes "there is no good and evil, there is only power, and those too weak to seek it." He had powers Dumbledore says he will never have, and Dumbledore is described as the "only one Voldemort was frightened of." He is still "out there somewhere, perhaps looking for another body to share."

Voldemort is feared because of the "dark days" he caused. People are so afraid of him that they often avoid saying his name, referring to him as "You-Know-Who," as Dumbledore explains that "Fear of a name increases fear of the thing itself." Hagrid gulps and shudders at the mention of his name, and Professor McGonagall

### Creating a User Interface with Gradio
This will create a simple web interface for users to query the Harry Potter RAG system

In [None]:
# Install Gradio for the web interface
!pip install gradio

In [None]:
import gradio as gr
import pickle
import os
import time
import numpy as np
from pathlib import Path

# Try to load the custom encoder model
custom_model_loaded = False
custom_encoder = None
try:
    # Path to the encoder model
    model_path = "../Encoder/model.pkl"  # Adjust path as needed

    with open(model_path, "rb") as f:
        custom_encoder = pickle.load(f)
    custom_model_loaded = True
    print(f"Successfully loaded custom encoder model from {model_path}")
except Exception as e:
    print(f"Could not load custom encoder model: {e}")
    print("Will use ChromaDB's default embedding function if custom model is selected.")

# Helper: get embedding using the custom encoder (robust)
def get_embedding_with_custom_model(text):
    """Generate embedding using the custom encoder model"""
    if custom_model_loaded and custom_encoder is not None:
        try:
            # Try common APIs: custom_encoder.encode([...]) or custom_encoder.transform([...])
            if hasattr(custom_encoder, "encode"):
                emb = custom_encoder.encode([text])
            elif hasattr(custom_encoder, "transform"):
                emb = custom_encoder.transform([text])
            else:
                # Try calling directly
                emb = custom_encoder([text])
            # emb might be shape (1, D) numpy or list
            emb0 = emb[0]
            # convert to plain python list of floats
            if isinstance(emb0, np.ndarray):
                return emb0.tolist()
            else:
                # convert nested types to floats
                return [float(x) for x in emb0]
        except Exception as e:
            print(f"Error using custom encoder: {e}")
            # fallback to Chroma's embedding if available
            try:
                return embedding_function([text])[0]
            except Exception:
                raise RuntimeError("Custom encoder failed and no embedding_function fallback available.")
    else:
        # Fallback to ChromaDB's embedding function if available
        try:
            return embedding_function([text])[0]
        except Exception:
            raise RuntimeError("No custom encoder loaded and embedding_function is not available.")

# Primary RAG pipeline function
def rag_query(query, top_k=3, use_hybrid_search=False, use_custom_encoder=False):
    """
    Process a query through the RAG pipeline and return:
      - answer (str)
      - source chunks (str)
      - elapsed time in seconds (str)
    """
    start_time = time.time()

    try:
        # If using hybrid search, prefer your existing answer_with_hybrid_rag function
        if use_hybrid_search:
            # If hybrid uses the collection internally, assume it handles embedder choice.
            # If you want hybrid to use the custom encoder, modify answer_with_hybrid_rag accordingly.
            response = answer_with_hybrid_rag(query, n_results=int(top_k))
            answer = response.get('answer', "")
            source_docs = response.get('source_documents', {"documents": [[]], "metadatas": [[]]})
            # For backward compatibility in formatting below, wrap appropriately if necessary
            results_obj = None
        else:
            # vector search branch: if custom encoder chosen, compute query embedding locally
            if use_custom_encoder and custom_model_loaded:
                try:
                    query_embedding = get_embedding_with_custom_model(query)
                except Exception as e:
                    # fallback: if embedding fails, fall back to text-based query
                    print(f"Custom encoder embedding failed: {e}. Falling back to Chroma text embedding.")
                    query_embedding = None

                if query_embedding is not None:
                    results = collection.query(
                        query_embeddings=[query_embedding],
                        n_results=int(top_k),
                        include=["documents", "metadatas", "distances"]
                    )
                else:
                    # fallback to server-side embedding
                    results = collection.query(
                        query_texts=[query],
                        n_results=int(top_k),
                        include=["documents", "metadatas", "distances"]
                    )
            else:
                # Use ChromaDB's default embedding function (server-side)
                results = collection.query(
                    query_texts=[query],
                    n_results=int(top_k),
                    include=["documents", "metadatas", "distances"]
                )

            # Format context from retrieved chunks
            documents = results["documents"][0]
            metadatas = results["metadatas"][0]
            distances = results.get("distances", [[]])[0] if "distances" in results else [None] * len(documents)

            # Format each document with its page number for better context
            formatted_docs = []
            for doc, meta, dist in zip(documents, metadatas, distances):
                page_num = meta.get("page_number", "unknown") if isinstance(meta, dict) else "unknown"
                if dist is not None:
                    formatted_docs.append(f"[Page {page_num}] (dist={dist:.4f}): {doc}")
                else:
                    formatted_docs.append(f"[Page {page_num}]: {doc}")

            # join into context
            context = "\n\n---\n\n".join(formatted_docs)

            # Fill the prompt template
            try:
                filled_prompt = prompt.format(context=context, query=query)
            except Exception:
                # fallback if prompt doesn't use named placeholders
                filled_prompt = f"{context}\n\nQuestion: {query}"

            # Call the LLM
            try:
                response_obj = llm.invoke(filled_prompt)
                answer = response_obj.content if hasattr(response_obj, 'content') else str(response_obj)
            except Exception as e:
                # If llm.invoke fails, surface the error
                answer = f"LLM invocation failed: {e}"

            source_docs = results

        # Format source documents for display
        sources_text = ""
        # if source_docs is a dict produced by collection.query
        if isinstance(source_docs, dict) and "documents" in source_docs and "metadatas" in source_docs:
            docs_list = source_docs["documents"][0]
            meta_list = source_docs["metadatas"][0]
            for i, (doc, meta) in enumerate(zip(docs_list, meta_list)):
                page_num = meta.get("page_number", "unknown") if isinstance(meta, dict) else "unknown"
                sources_text += f"\n\nSource {i+1} [Page {page_num}]:\n{doc}"
        else:
            # Try to format the results object if available
            try:
                docs_list = source_docs["documents"][0]
                meta_list = source_docs["metadatas"][0]
                for i, (doc, meta) in enumerate(zip(docs_list, meta_list)):
                    page_num = meta.get("page_number", "unknown") if isinstance(meta, dict) else "unknown"
                    sources_text += f"\n\nSource {i+1} [Page {page_num}]:\n{doc}"
            except Exception:
                # fallback textual representation
                sources_text = str(source_docs)

        elapsed = time.time() - start_time
        elapsed_str = f"{elapsed:.3f} seconds"

        return answer, sources_text, elapsed_str

    except Exception as e:
        elapsed = time.time() - start_time
        elapsed_str = f"{elapsed:.3f} seconds"
        return f"Error during processing: {e}", "", elapsed_str


# Create Gradio Interface
with gr.Blocks(title="Harry Potter RAG System") as demo:
    gr.Markdown("# 🧙‍♂️ Harry Potter RAG System")
    gr.Markdown("Ask questions about the Harry Potter books and get answers based on the text.")

    with gr.Row():
        with gr.Column(scale=4):
            query_input = gr.Textbox(
                label="Your Question",
                placeholder="E.g., Who is Harry Potter and what happened to his parents?",
                lines=2
            )
        with gr.Column(scale=1):
            top_k = gr.Slider(
                minimum=1,
                maximum=10,
                value=3,
                step=1,
                label="Number of chunks to retrieve"
            )
            hybrid_search = gr.Checkbox(
                label="Use hybrid search",
                value=True,
                info="If enabled, uses your hybrid function (answer_with_hybrid_rag)."
            )
            custom_encoder_cb = gr.Checkbox(
                label="Use custom encoder (model.pkl)",
                value=False,
                interactive=custom_model_loaded,
                info="Use custom encoder model instead of ChromaDB's default" if custom_model_loaded else "Custom model not found"
            )

    submit_btn = gr.Button("Submit Question")

    with gr.Row():
        with gr.Column():
            answer_output = gr.Textbox(label="Answer", lines=10)
        with gr.Column():
            sources_output = gr.Textbox(label="Source Chunks", lines=10, max_lines=30)
            elapsed_output = gr.Textbox(label="Elapsed Time (s)", lines=1)

    # Examples (last field corresponds to custom_encoder checkbox)
    gr.Examples([
        ["Who is Dumbledore?", 3, True, False],
        ["What happened when Harry met Hagrid?", 5, True, False],
        ["What are the four houses at Hogwarts?", 3, True, False],
        ["Why did Harry survive Voldemort's killing curse?", 5, True, False],
    ], inputs=[query_input, top_k, hybrid_search, custom_encoder_cb])

    submit_btn.click(
        fn=rag_query,
        inputs=[query_input, top_k, hybrid_search, custom_encoder_cb],
        outputs=[answer_output, sources_output, elapsed_output]
    )

# Launch the demo
demo.launch(share=True)

Could not load custom encoder model: [Errno 2] No such file or directory: '../Encoder/model.pkl'
Will use ChromaDB's default embedding function if custom model is selected.
* Running on local URL:  http://127.0.0.1:7861

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


