In [1]:
# RAG Data Store Generator with HuggingFace Embeddings
# Complete code for Jupyter Notebook

from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from dotenv import load_dotenv
import os
import shutil

# Load environment variables (optional since we're not using OpenAI)
load_dotenv(dotenv_path=".env.txt", override=True)

# Configuration
CHROMA_PATH = "chroma"
DATA_PATH = "data/books"

def main():
    """Main function to generate the data store."""
    generate_data_store()

def generate_data_store():
    """Generate the vector database from documents."""
    print("🚀 Starting RAG data store generation...")
    
    # Load documents
    documents = load_documents()
    if not documents:
        print("❌ No documents found. Exiting.")
        return
    
    # Split into chunks
    chunks = split_text(documents)
    if not chunks:
        print("❌ No chunks created. Exiting.")
        return
    
    # Save to vector database
    save_to_chroma(chunks)
    
    print("\n🎉 Data store generation completed successfully!")
    print(f"📁 Database location: {CHROMA_PATH}")

def load_documents():
    """Load documents from the specified directory."""
    if not os.path.exists(DATA_PATH):
        print(f"Error: Data path {DATA_PATH} does not exist.")
        return []
    
    loader = DirectoryLoader(DATA_PATH, glob="*.md")
    documents = loader.load()
    print(f"📚 Loaded {len(documents)} documents from {DATA_PATH}")
    return documents

def split_text(documents: list[Document]):
    """Split documents into smaller chunks for better embedding."""
    if not documents:
        print("No documents to split.")
        return []
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    
    chunks = text_splitter.split_documents(documents)
    print(f"✂️ Split {len(documents)} documents into {len(chunks)} chunks.")
    
    # Display sample chunk information
    if chunks:
        sample_idx = min(10, len(chunks) - 1)
        document = chunks[sample_idx]
        print("\n📄 Sample chunk:")
        print("-" * 50)
        print(document.page_content)
        print(f"\n📋 Metadata: {document.metadata}")
        print("-" * 50)
    
    return chunks

def save_to_chroma(chunks: list[Document]):
    """Save document chunks to Chroma vector database using HuggingFace embeddings."""
    if not chunks:
        print("No chunks to save.")
        return
    
    # Clear out the existing database
    if os.path.exists(CHROMA_PATH):
        print(f"🗑️ Removing existing database at {CHROMA_PATH}")
        shutil.rmtree(CHROMA_PATH)
    
    # Create HuggingFace embeddings
    print("🤗 Initializing HuggingFace embeddings...")
    print("📥 Downloading model (first time only)...")
    
    embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    print("✅ HuggingFace embeddings loaded successfully!")
    
    # Create and save the vector database
    print("💾 Creating Chroma database...")
    db = Chroma.from_documents(
        chunks, 
        embeddings, 
        persist_directory=CHROMA_PATH
    )
    db.persist()
    print(f"✅ Saved {len(chunks)} chunks to {CHROMA_PATH}")

def verify_data_store():
    """Verify that the data store was created successfully."""
    print("🔍 Verifying data store...")
    
    if not os.path.exists(CHROMA_PATH):
        print("❌ Chroma database was not created successfully.")
        return False
    
    try:
        # Try to load the database
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
        
        # Test a simple query
        results = db.similarity_search("Alice", k=3)
        print(f"✅ Database verification successful!")
        print(f"📊 Found {len(results)} results for test query 'Alice'")
        
        if results:
            print(f"📝 Sample result: {results[0].page_content[:100]}...")
        
        return True
    except Exception as e:
        print(f"❌ Database verification failed: {e}")
        return False

def query_database(query: str, k: int = 5):
    """Query the vector database for similar chunks."""
    if not os.path.exists(CHROMA_PATH):
        print("❌ Database not found. Run generate_data_store() first.")
        return []
    
    try:
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
        
        results = db.similarity_search(query, k=k)
        print(f"🔍 Query: '{query}'")
        print(f"📊 Found {len(results)} similar chunks:")
        print("-" * 60)
        
        for i, result in enumerate(results, 1):
            print(f"\n{i}. {result.page_content[:200]}...")
            print(f"   📁 Source: {result.metadata.get('source', 'Unknown')}")
        
        return results
    except Exception as e:
        print(f"❌ Query failed: {e}")
        return []

# Execution logic
if __name__ == "__main__":
    # If running as script
    main()
    verify_data_store()
else:
    # If imported in Jupyter notebook
    print("📚 RAG Data Store Generator loaded!")
    print("\n🚀 Available functions:")
    print("  • generate_data_store() - Create the vector database")
    print("  • verify_data_store() - Test the database")
    print("  • query_database('your query') - Search the database")
    print("\n💡 Quick start: Run generate_data_store()")

# Auto-run if this is the main execution
# Uncomment the lines below to auto-execute when running the cell
# generate_data_store()
# verify_data_store()

🚀 Starting RAG data store generation...
📚 Loaded 1 documents from data/books
✂️ Split 1 documents into 801 chunks.

📄 Sample chunk:
--------------------------------------------------
So she was considering in her own mind (as well as she could, for the
hot day made her feel very sleepy and stupid), whether the pleasure of
making a daisy-chain would be worth the trouble of getting up and
picking the daisies, when suddenly a White Rabbit with pink eyes ran
close by her.

📋 Metadata: {'source': 'data\\books\\alice_in_wonderland.md', 'start_index': 1654}
--------------------------------------------------
🗑️ Removing existing database at chroma
🤗 Initializing HuggingFace embeddings...
📥 Downloading model (first time only)...


  embeddings = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


✅ HuggingFace embeddings loaded successfully!
💾 Creating Chroma database...


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
  db.persist()


✅ Saved 801 chunks to chroma

🎉 Data store generation completed successfully!
📁 Database location: chroma
🔍 Verifying data store...


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given


✅ Database verification successful!
📊 Found 3 results for test query 'Alice'
📝 Sample result: CHAPTER XII.
Alice’s Evidence...


In [2]:
# HuggingFace Embeddings and Word Comparison Tool
# Complete code for Jupyter Notebook - No OpenAI dependencies

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.evaluation import load_evaluator
from dotenv import load_dotenv
import numpy as np
import os

# Load environment variables (optional since we're not using OpenAI)
load_dotenv()

def cosine_similarity(a, b):
    """Calculate cosine similarity between two vectors."""
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def cosine_distance(a, b):
    """Calculate cosine distance between two vectors."""
    return 1 - cosine_similarity(a, b)

def euclidean_distance(a, b):
    """Calculate Euclidean distance between two vectors."""
    return np.linalg.norm(np.array(a) - np.array(b))

def compare_words_advanced(embedding_function, word1, word2):
    """Advanced comparison between two words using multiple metrics."""
    print(f"\n🔍 Advanced comparison between '{word1}' and '{word2}':")
    print("-" * 60)
    
    # Get embeddings
    vector1 = embedding_function.embed_query(word1)
    vector2 = embedding_function.embed_query(word2)
    
    # Calculate different similarity metrics
    cos_sim = cosine_similarity(vector1, vector2)
    cos_dist = cosine_distance(vector1, vector2)
    eucl_dist = euclidean_distance(vector1, vector2)
    
    print(f"📊 Cosine Similarity: {cos_sim:.4f}")
    print(f"📏 Cosine Distance: {cos_dist:.4f}")
    print(f"📐 Euclidean Distance: {eucl_dist:.4f}")
    
    # Interpretation
    if cos_sim > 0.8:
        print("💚 Very similar words")
    elif cos_sim > 0.6:
        print("💛 Moderately similar words")
    elif cos_sim > 0.4:
        print("🧡 Somewhat similar words")
    else:
        print("❤️ Quite different words")
    
    return {
        'cosine_similarity': cos_sim,
        'cosine_distance': cos_dist,
        'euclidean_distance': eucl_dist
    }

def main():
    """Main function to demonstrate embeddings and word comparisons."""
    print("🤗 Starting HuggingFace Embeddings Demo...")
    
    # Initialize HuggingFace embeddings
    print("📥 Loading HuggingFace embeddings model...")
    embedding_function = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    print("✅ HuggingFace embeddings loaded successfully!")
    
    # Get embedding for a single word
    test_word = "apple"
    print(f"\n🍎 Getting embedding for '{test_word}'...")
    vector = embedding_function.embed_query(test_word)
    
    print(f"📊 Vector for '{test_word}': {vector[:5]}... (showing first 5 dimensions)")
    print(f"📏 Vector length: {len(vector)} dimensions")
    print(f"🔢 Vector data type: {type(vector[0])}")
    
    # Compare different word pairs
    word_pairs = [
        ("apple", "iphone"),      # Related tech/brand
        ("apple", "orange"),      # Related fruits
        ("car", "automobile"),    # Synonyms
        ("happy", "sad"),         # Opposites
        ("dog", "computer"),      # Unrelated
        ("king", "queen"),        # Related concepts
        ("paris", "france"),      # Related geography
    ]
    
    print(f"\n🔍 Comparing {len(word_pairs)} word pairs...")
    print("=" * 60)
    
    results = []
    for word1, word2 in word_pairs:
        result = compare_words_advanced(embedding_function, word1, word2)
        results.append((word1, word2, result))
    
    # Try using LangChain's built-in evaluator (if available)
    print(f"\n🧪 Testing LangChain's built-in evaluator...")
    try:
        evaluator = load_evaluator("pairwise_embedding_distance", embeddings=embedding_function)
        test_pair = ("apple", "iphone")
        langchain_result = evaluator.evaluate_string_pairs(
            prediction=test_pair[0], 
            prediction_b=test_pair[1]
        )
        print(f"✅ LangChain evaluator result for {test_pair}: {langchain_result}")
    except Exception as eval_error:
        print(f"⚠️ LangChain evaluator failed: {eval_error}")
        print("💡 Using manual comparison methods instead (which work great!)")
    
    # Summary of results
    print(f"\n📈 Summary of Word Pair Similarities:")
    print("=" * 60)
    sorted_results = sorted(results, key=lambda x: x[2]['cosine_similarity'], reverse=True)
    
    for i, (word1, word2, metrics) in enumerate(sorted_results, 1):
        similarity = metrics['cosine_similarity']
        print(f"{i:2d}. {word1:10} ↔ {word2:10} | Similarity: {similarity:.4f}")
    
    return results

def test_embedding_quality():
    """Test the quality of embeddings with different types of words."""
    print("\n🧪 Testing embedding quality with different word types...")
    
    embedding_function = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    
    # Test categories
    test_categories = {
        "Animals": ["dog", "cat", "elephant", "mouse"],
        "Colors": ["red", "blue", "green", "yellow"],
        "Countries": ["france", "germany", "japan", "brazil"],
        "Emotions": ["happy", "sad", "angry", "excited"],
        "Technology": ["computer", "smartphone", "internet", "software"]
    }
    
    for category, words in test_categories.items():
        print(f"\n📂 Category: {category}")
        print("-" * 40)
        
        # Compare first word with others in the same category
        base_word = words[0]
        for other_word in words[1:]:
            result = compare_words_advanced(embedding_function, base_word, other_word)
            # Just show the similarity score for brevity
            similarity = result['cosine_similarity']
            print(f"  {base_word} ↔ {other_word}: {similarity:.3f}")

def query_similar_words(word, word_list, top_k=3):
    """Find the most similar words from a list to a given word."""
    print(f"\n🎯 Finding words most similar to '{word}'...")
    
    embedding_function = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    
    target_vector = embedding_function.embed_query(word)
    similarities = []
    
    for candidate_word in word_list:
        if candidate_word.lower() != word.lower():  # Skip the same word
            candidate_vector = embedding_function.embed_query(candidate_word)
            similarity = cosine_similarity(target_vector, candidate_vector)
            similarities.append((candidate_word, similarity))
    
    # Sort by similarity and get top_k
    similarities.sort(key=lambda x: x[1], reverse=True)
    top_similar = similarities[:top_k]
    
    print(f"🏆 Top {top_k} most similar words to '{word}':")
    for i, (similar_word, similarity) in enumerate(top_similar, 1):
        print(f"  {i}. {similar_word}: {similarity:.4f}")
    
    return top_similar

# Execution logic
if __name__ == "__main__":
    # If running as script
    main()
    test_embedding_quality()
    
    # Example of finding similar words
    word_bank = ["apple", "orange", "banana", "computer", "laptop", "phone", 
                 "car", "bicycle", "happy", "joyful", "sad", "angry"]
    query_similar_words("apple", word_bank)
    
else:
    # If imported in Jupyter notebook
    print("🤗 HuggingFace Embeddings Tool loaded!")
    print("\n🚀 Available functions:")
    print("  • main() - Run complete demo")
    print("  • compare_words_advanced(embedding_function, word1, word2) - Compare two words")
    print("  • test_embedding_quality() - Test embeddings across categories")
    print("  • query_similar_words(word, word_list) - Find similar words")
    print("\n💡 Quick start: Run main()")

# Auto-run demo (uncomment to auto-execute)
# main()

🤗 Starting HuggingFace Embeddings Demo...
📥 Loading HuggingFace embeddings model...
✅ HuggingFace embeddings loaded successfully!

🍎 Getting embedding for 'apple'...
📊 Vector for 'apple': [-0.006138438358902931, 0.0310117956250906, 0.06479357928037643, 0.010941504500806332, 0.0052671851590275764]... (showing first 5 dimensions)
📏 Vector length: 384 dimensions
🔢 Vector data type: <class 'float'>

🔍 Comparing 7 word pairs...

🔍 Advanced comparison between 'apple' and 'iphone':
------------------------------------------------------------
📊 Cosine Similarity: 0.7238
📏 Cosine Distance: 0.2762
📐 Euclidean Distance: 0.7432
💛 Moderately similar words

🔍 Advanced comparison between 'apple' and 'orange':
------------------------------------------------------------
📊 Cosine Similarity: 0.3726
📏 Cosine Distance: 0.6274
📐 Euclidean Distance: 1.1202
❤️ Quite different words

🔍 Advanced comparison between 'car' and 'automobile':
------------------------------------------------------------
📊 Cosine Si

In [3]:
# RAG Query System with HuggingFace Embeddings
# Complete code for Jupyter Notebook - No OpenAI dependencies

import argparse
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate
from dotenv import load_dotenv
import os

# Load environment variables (optional)
load_dotenv()

CHROMA_PATH = "chroma"
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
---
Answer the question based on the above context: {question}
"""

def initialize_embeddings():
    """Initialize HuggingFace embeddings."""
    print("🤗 Initializing HuggingFace embeddings...")
    embedding_function = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
    print("✅ HuggingFace embeddings loaded successfully!")
    return embedding_function

def initialize_database():
    """Initialize the Chroma database with HuggingFace embeddings."""
    if not os.path.exists(CHROMA_PATH):
        print(f"❌ Database not found at {CHROMA_PATH}")
        print("💡 Please run the data store generation script first!")
        return None
    
    embedding_function = initialize_embeddings()
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
    print(f"📚 Database loaded from {CHROMA_PATH}")
    return db

def search_documents(db, query_text, k=5):
    """Search for relevant documents in the database."""
    print(f"🔍 Searching for: '{query_text}'")
    
    # Search the DB
    results = db.similarity_search_with_relevance_scores(query_text, k=k)
    
    # Debug: Show what we found
    print(f"📊 Found {len(results)} results:")
    print("-" * 60)
    
    for i, (doc, score) in enumerate(results, 1):
        print(f"📄 Result {i} - Relevance Score: {score:.3f}")
        print(f"📝 Content preview: {doc.page_content[:150]}...")
        print(f"📁 Source: {doc.metadata.get('source', 'Unknown')}")
        print(f"📍 Start index: {doc.metadata.get('start_index', 'N/A')}")
        print("-" * 60)
    
    return results

def generate_response_simple(results, query_text):
    """Generate a simple response without using ChatGPT/OpenAI."""
    if not results:
        return "❌ No relevant documents found to answer your question."
    
    # Create context from top results
    context_parts = []
    sources = []
    
    for doc, score in results[:3]:  # Use top 3 results
        if score > 0.3:  # Lower threshold for HuggingFace embeddings
            context_parts.append(doc.page_content)
            source = doc.metadata.get('source', 'Unknown')
            if source not in sources:
                sources.append(source)
    
    if not context_parts:
        return "❌ No sufficiently relevant documents found."
    
    context_text = "\n\n---\n\n".join(context_parts)
    
    # Simple response without LLM
    response = f"""
📋 **Context-Based Answer:**

Based on the relevant text passages found, here are the key excerpts that relate to your question: "{query_text}"

**Relevant Content:**
{context_text}

**Sources:** {', '.join(sources)}

💡 **Note:** This is a context-based response. The relevant passages above should contain information to answer your question.
"""
    
    return response

def generate_response_with_local_llm(results, query_text):
    """Generate response using a local LLM approach (placeholder for future enhancement)."""
    # This could be enhanced with local models like Ollama, Hugging Face transformers, etc.
    print("🚧 Local LLM integration not implemented yet.")
    print("📋 Falling back to context-based response...")
    return generate_response_simple(results, query_text)

def query_documents(question: str, use_llm=False):
    """
    Query documents with a specific question.
    
    Args:
        question (str): The question to ask
        use_llm (bool): Whether to try using a local LLM (not implemented yet)
    
    Returns:
        dict: Response data or None if failed
    """
    try:
        print(f"\n🎯 Processing question: '{question}'")
        print("=" * 80)
        
        # Initialize database
        db = initialize_database()
        if db is None:
            return None
        
        # Search for relevant documents
        results = search_documents(db, question, k=5)
        
        if len(results) == 0:
            print(f"❌ No results found for query: '{question}'")
            return None
        
        # Check relevance scores
        best_score = results[0][1] if results else 0
        print(f"🎯 Best relevance score: {best_score:.3f}")
        
        if best_score < 0.2:  # Lower threshold for HuggingFace
            print("⚠️ Low relevance scores. Results may not be very relevant.")
        
        # Generate response
        if use_llm:
            response_text = generate_response_with_local_llm(results, question)
        else:
            response_text = generate_response_simple(results, question)
        
        # Collect sources
        sources = [doc.metadata.get("source", "Unknown") for doc, _score in results[:3]]
        context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results[:3]])
        
        result = {
            "question": question,
            "response": response_text,
            "sources": list(set(sources)),  # Remove duplicates
            "context": context_text,
            "relevance_scores": [score for _, score in results[:3]]
        }
        
        print("\n" + "=" * 80)
        print("🤖 **RESPONSE:**")
        print("=" * 80)
        print(response_text)
        print("\n" + "=" * 80)
        
        return result
        
    except Exception as e:
        print(f"❌ An error occurred: {e}")
        return None

def main():
    """Main function to demonstrate multiple query variations."""
    print("🚀 Starting RAG Query System Demo...")
    
    # Try different variations of queries
    queries_to_try = [
        "How does Alice meet the Mad Hatter?",
        "Alice Mad Hatter meeting",
        "Alice encounters Mad Hatter", 
        "Mad Hatter Alice first meeting",
        "Alice tea party",
        "White Rabbit",
        "What happens in Alice in Wonderland?",
        "Who is the Cheshire Cat?"
    ]
    
    print(f"🔍 Testing with {len(queries_to_try)} different queries...")
    print("=" * 80)
    
    results_summary = []
    
    for i, query_text in enumerate(queries_to_try, 1):
        print(f"\n📍 Query {i}/{len(queries_to_try)}: '{query_text}'")
        print("-" * 80)
        
        result = query_documents(query_text)
        if result:
            results_summary.append({
                "query": query_text,
                "best_score": max(result["relevance_scores"]) if result["relevance_scores"] else 0,
                "sources": result["sources"]
            })
        else:
            results_summary.append({
                "query": query_text,
                "best_score": 0,
                "sources": []
            })
        
        print("\n" + "🔄" * 40)
    
    # Summary of all queries
    print(f"\n📊 **SUMMARY OF ALL QUERIES:**")
    print("=" * 80)
    
    for i, summary in enumerate(results_summary, 1):
        score = summary["best_score"]
        query = summary["query"]
        sources_count = len(summary["sources"])
        
        status = "✅" if score > 0.5 else "⚠️" if score > 0.2 else "❌"
        print(f"{status} {i:2d}. {query[:50]:<50} | Score: {score:.3f} | Sources: {sources_count}")

def interactive_mode():
    """Interactive mode for asking custom questions."""
    print("🎪 **Interactive Query Mode**")
    print("Type your questions (or 'quit' to exit):")
    print("-" * 50)
    
    db = initialize_database()
    if db is None:
        return
    
    while True:
        try:
            question = input("\n❓ Your question: ").strip()
            
            if question.lower() in ['quit', 'exit', 'q']:
                print("👋 Goodbye!")
                break
            
            if not question:
                print("Please enter a question.")
                continue
            
            result = query_documents(question)
            
        except KeyboardInterrupt:
            print("\n👋 Goodbye!")
            break
        except Exception as e:
            print(f"❌ Error: {e}")

# Execution logic
if __name__ == "__main__":
    # If running as script
    main()
    
    # Uncomment to run interactive mode
    # interactive_mode()
    
else:
    # If imported in Jupyter notebook
    print("📚 RAG Query System loaded!")
    print("\n🚀 Available functions:")
    print("  • main() - Run demo with multiple queries")
    print("  • query_documents('your question') - Ask a specific question")
    print("  • interactive_mode() - Start interactive question mode")
    print("  • initialize_database() - Load the vector database")
    print("\n💡 Quick start: Run main() or query_documents('your question')")
    print("\n⚠️ Note: Make sure you've created the vector database first!")

# Auto-run demo (uncomment to auto-execute)
# main()

🚀 Starting RAG Query System Demo...
🔍 Testing with 8 different queries...

📍 Query 1/8: 'How does Alice meet the Mad Hatter?'
--------------------------------------------------------------------------------

🎯 Processing question: 'How does Alice meet the Mad Hatter?'
🤗 Initializing HuggingFace embeddings...


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


✅ HuggingFace embeddings loaded successfully!
📚 Database loaded from chroma
🔍 Searching for: 'How does Alice meet the Mad Hatter?'
📊 Found 5 results:
------------------------------------------------------------
📄 Result 1 - Relevance Score: 0.540
📝 Content preview: “Really, now you ask me,” said Alice, very much confused, “I don’t
think—”

“Then you shouldn’t talk,” said the Hatter....
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 82827
------------------------------------------------------------
📄 Result 2 - Relevance Score: 0.519
📝 Content preview: Alice waited a little, half expecting to see it again, but it did not
appear, and after a minute or two she walked on in the direction in
which the Ma...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 70057
------------------------------------------------------------
📄 Result 3 - Relevance Score: 0.478
📝 Content preview: “Have you guessed the riddle yet?” the Hatter said, turning to Alice
again.

“No, I give it up,

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


✅ HuggingFace embeddings loaded successfully!
📚 Database loaded from chroma
🔍 Searching for: 'Alice Mad Hatter meeting'
📊 Found 5 results:
------------------------------------------------------------
📄 Result 1 - Relevance Score: 0.522
📝 Content preview: “Really, now you ask me,” said Alice, very much confused, “I don’t
think—”

“Then you shouldn’t talk,” said the Hatter....
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 82827
------------------------------------------------------------
📄 Result 2 - Relevance Score: 0.473
📝 Content preview: The Hatter was the first to break the silence. “What day of the month
is it?” he said, turning to Alice: he had taken his watch out of his
pocket, and...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 74110
------------------------------------------------------------
📄 Result 3 - Relevance Score: 0.470
📝 Content preview: “Is that the way you manage?” Alice asked.

The Hatter shook his head mournfully. “Not I!” he replied. “We

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


✅ HuggingFace embeddings loaded successfully!
📚 Database loaded from chroma
🔍 Searching for: 'Alice encounters Mad Hatter'
📊 Found 5 results:
------------------------------------------------------------
📄 Result 1 - Relevance Score: 0.569
📝 Content preview: “Really, now you ask me,” said Alice, very much confused, “I don’t
think—”

“Then you shouldn’t talk,” said the Hatter....
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 82827
------------------------------------------------------------
📄 Result 2 - Relevance Score: 0.520
📝 Content preview: Alice waited a little, half expecting to see it again, but it did not
appear, and after a minute or two she walked on in the direction in
which the Ma...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 70057
------------------------------------------------------------
📄 Result 3 - Relevance Score: 0.507
📝 Content preview: “There’s no such thing!” Alice was beginning very angrily, but the
Hatter and the March Hare went “Sh! 

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


✅ HuggingFace embeddings loaded successfully!
📚 Database loaded from chroma
🔍 Searching for: 'Mad Hatter Alice first meeting'
📊 Found 5 results:
------------------------------------------------------------
📄 Result 1 - Relevance Score: 0.512
📝 Content preview: “Really, now you ask me,” said Alice, very much confused, “I don’t
think—”

“Then you shouldn’t talk,” said the Hatter....
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 82827
------------------------------------------------------------
📄 Result 2 - Relevance Score: 0.490
📝 Content preview: The Hatter was the first to break the silence. “What day of the month
is it?” he said, turning to Alice: he had taken his watch out of his
pocket, and...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 74110
------------------------------------------------------------
📄 Result 3 - Relevance Score: 0.465
📝 Content preview: “I didn’t know it was your table,” said Alice; “it’s laid for a great
many more than three.”

“Your 

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


✅ HuggingFace embeddings loaded successfully!
📚 Database loaded from chroma
🔍 Searching for: 'Alice tea party'
📊 Found 5 results:
------------------------------------------------------------
📄 Result 1 - Relevance Score: 0.573
📝 Content preview: “At any rate I’ll never go there again!” said Alice as she picked her
way through the wood. “It’s the stupidest tea-party I ever was at in
all my life...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 83311
------------------------------------------------------------
📄 Result 2 - Relevance Score: 0.459
📝 Content preview: CHAPTER VII.
A Mad Tea-Party...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 71550
------------------------------------------------------------
📄 Result 3 - Relevance Score: 0.442
📝 Content preview: “How dreadfully savage!” exclaimed Alice.

“And ever since that,” the Hatter went on in a mournful tone, “he won’t
do a thing I ask! It’s always six o...
📁 Source: data\books\alice_in_wonderland.md
📍 Start 

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


✅ HuggingFace embeddings loaded successfully!
📚 Database loaded from chroma
🔍 Searching for: 'White Rabbit'
📊 Found 5 results:
------------------------------------------------------------
📄 Result 1 - Relevance Score: 0.422
📝 Content preview: surprise, when the White Rabbit read out, at the top of his shrill
little voice, the name “Alice!”...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 132025
------------------------------------------------------------
📄 Result 2 - Relevance Score: 0.417
📝 Content preview: So Alice began telling them her adventures from the time when she first
saw the White Rabbit. She was a little nervous about it just at first,
the two...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 117035
------------------------------------------------------------
📄 Result 3 - Relevance Score: 0.413
📝 Content preview: After a time she heard a little pattering of feet in the distance, and
she hastily dried her eyes to see what was coming. It was the Whit

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


✅ HuggingFace embeddings loaded successfully!
📚 Database loaded from chroma
🔍 Searching for: 'What happens in Alice in Wonderland?'
📊 Found 5 results:
------------------------------------------------------------
📄 Result 1 - Relevance Score: 0.507
📝 Content preview: There was a dead silence instantly, and Alice thought to herself, “I
wonder what they will do next! If they had any sense, they’d take the
roof off.” ...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 41389
------------------------------------------------------------
📄 Result 2 - Relevance Score: 0.479
📝 Content preview: Alice could think of nothing else to say but “It belongs to the
Duchess: you’d better ask her about it.”

“She’s in prison,” the Queen said to the exe...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 97352
------------------------------------------------------------
📄 Result 3 - Relevance Score: 0.475
📝 Content preview: Just at this moment Alice felt a very curious sensation, which 

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


✅ HuggingFace embeddings loaded successfully!
📚 Database loaded from chroma
🔍 Searching for: 'Who is the Cheshire Cat?'
📊 Found 5 results:
------------------------------------------------------------
📄 Result 1 - Relevance Score: 0.467
📝 Content preview: was a little startled by seeing the Cheshire Cat sitting on a bough of
a tree a few yards off....
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 67588
------------------------------------------------------------
📄 Result 2 - Relevance Score: 0.465
📝 Content preview: watching it a minute or two, she made it out to be a grin, and she said
to herself “It’s the Cheshire Cat: now I shall have somebody to talk
to.”...
📁 Source: data\books\alice_in_wonderland.md
📍 Start index: 92811
------------------------------------------------------------
📄 Result 3 - Relevance Score: 0.442
📝 Content preview: rather a handsome pig, I think.” And she began thinking over other
children she knew, who might do very well as pigs, and was just sayin