# Sentio+ RAG Pipeline

A straightforward implementation of the RAG pipeline for app review insights.

**Components:**
1. Configuration
2. ChromaDB vector store
3. LLM setup (AWS Bedrock or OpenAI-compatible)
4. Data ingestion with chunking
5. RAG query pipeline

## 1. Configuration

In [1]:
import os
from pathlib import Path
from dotenv import load_dotenv

from langchain.tools import tool
from langchain.agents import create_agent
from langchain.messages import HumanMessage, AIMessage, SystemMessage
from langgraph.checkpoint.memory import InMemorySaver

# Load environment variables from .env file
load_dotenv()

# ==================== CONFIGURATION ====================
# NOTE: This notebook mirrors the settings in src/config/settings.py
# Update both when making changes to maintain consistency.

# Resolve paths relative to notebook location (app/)
NOTEBOOK_DIR = Path(".").resolve()
PROJECT_ROOT = NOTEBOOK_DIR.parent  # sentio-plus/

# LLM Configuration
LLM_PROVIDER = "bedrock"  # "bedrock" or "openai"
LLM_MODEL = "anthropic.claude-3-sonnet-20240229-v1:0"  # For Bedrock
# LLM_MODEL = "llama3.2"  # For local Ollama
LLM_BASE_URL = "http://localhost:11434/v1"  # For OpenAI-compatible (Ollama, LM Studio)
LLM_API_KEY = os.getenv("LLM_API_KEY", "not-needed")  # Local models don't need this
LLM_TEMPERATURE = 0.1
LLM_MAX_TOKENS = 1000

# AWS Configuration (only needed for Bedrock)
AWS_REGION = os.getenv("AWS_REGION", "us-west-2")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")

# ChromaDB Configuration
CHROMA_CLIENT_TYPE = "persistent"  # "persistent", "http", or "cloud"
# Use a fresh directory to avoid conflicts with corrupted/incompatible old data
# The old chroma_data/ was created with ChromaDB <1.4 and is incompatible with 1.4+
CHROMA_PERSIST_PATH = NOTEBOOK_DIR / "chroma_data_v2"  # Fresh directory for ChromaDB 1.4+
CHROMA_HOST = "localhost"  # For HTTP client
CHROMA_PORT = 8000  # For HTTP client
CHROMA_CLOUD_API_KEY = os.getenv("CHROMA_API_KEY")  # For cloud client
CHROMA_CLOUD_TENANT = os.getenv("CHROMA_TENANT")  # For cloud client
CHROMA_CLOUD_DATABASE = os.getenv("CHROMA_DATABASE")  # For cloud client
CHROMA_COLLECTION_NAME = "sentio_reviews"

# Retrieval Configuration
RETRIEVAL_TOP_K = 5
RETRIEVAL_THRESHOLD = 1.2  # Cosine distance: 0=identical, 2=opposite

# Chunking Configuration
CHUNK_SIZE = 500
CHUNK_OVERLAP = 100

# Data paths
DATA_DIR = PROJECT_ROOT / "data"
CSV_FILE = DATA_DIR / "processed" / "sentio_plus_rag_ready.csv"
INGEST_LIMIT = 1000  # Set to None for all rows

print("‚úÖ Configuration loaded")
print(f"   Notebook dir: {NOTEBOOK_DIR}")
print(f"   Project root: {PROJECT_ROOT}")
print(f"   ChromaDB path: {CHROMA_PERSIST_PATH}")

  from .autonotebook import tqdm as notebook_tqdm


‚úÖ Configuration loaded
   Notebook dir: C:\Users\hayde\OneDrive\Documents\GitHub\sentio-plus\Project
   Project root: C:\Users\hayde\OneDrive\Documents\GitHub\sentio-plus
   ChromaDB path: C:\Users\hayde\OneDrive\Documents\GitHub\sentio-plus\Project\chroma_data_v2


## 2. Initialize ChromaDB Vector Store

In [2]:
import uuid
import chromadb
from chromadb.config import Settings as ChromaSettings

# Initialize ChromaDB client based on client type
if CHROMA_CLIENT_TYPE == "persistent":
    # Create persistent directory
    CHROMA_PERSIST_PATH.mkdir(parents=True, exist_ok=True)
    
    chroma_client = chromadb.PersistentClient(
        path=str(CHROMA_PERSIST_PATH),
        settings=ChromaSettings(anonymized_telemetry=False),
    )
    print(f"‚úÖ ChromaDB initialized (persistent): {CHROMA_PERSIST_PATH}")
    
elif CHROMA_CLIENT_TYPE == "http":
    chroma_client = chromadb.HttpClient(
        host=CHROMA_HOST,
        port=CHROMA_PORT,
        settings=ChromaSettings(anonymized_telemetry=False),
    )
    print(f"‚úÖ ChromaDB initialized (http): {CHROMA_HOST}:{CHROMA_PORT}")
    
elif CHROMA_CLIENT_TYPE == "cloud":
    if not CHROMA_CLOUD_API_KEY or not CHROMA_CLOUD_TENANT or not CHROMA_CLOUD_DATABASE:
        raise ValueError("CHROMA_API_KEY, CHROMA_TENANT, and CHROMA_DATABASE required for cloud client.")
    
    chroma_client = chromadb.CloudClient(
        tenant=CHROMA_CLOUD_TENANT,
        database=CHROMA_CLOUD_DATABASE,
        api_key=CHROMA_CLOUD_API_KEY,
        settings=ChromaSettings(anonymized_telemetry=False),
    )
    print(f"‚úÖ ChromaDB initialized (cloud): {CHROMA_CLOUD_TENANT}/{CHROMA_CLOUD_DATABASE}")
    
else:
    raise ValueError(f"Unknown CHROMA_CLIENT_TYPE: {CHROMA_CLIENT_TYPE}. Use 'persistent', 'http', or 'cloud'.")

# Get or create collection
collection = chroma_client.get_or_create_collection(
    name=CHROMA_COLLECTION_NAME,
    metadata={"hnsw:space": "cosine"},
)

print(f"   Collection: {CHROMA_COLLECTION_NAME}")
print(f"   Documents in collection: {collection.count():,}")

‚úÖ ChromaDB initialized (persistent): C:\Users\hayde\OneDrive\Documents\GitHub\sentio-plus\Project\chroma_data_v2
   Collection: sentio_reviews
   Documents in collection: 1,071


## 3. Initialize LLM

In [3]:
import re
from langchain_core.messages import HumanMessage, SystemMessage

# Initialize LLM based on provider
if LLM_PROVIDER == "bedrock":
    import boto3
    from langchain_aws import ChatBedrock
    
    if not AWS_ACCESS_KEY_ID or not AWS_SECRET_ACCESS_KEY:
        raise ValueError("AWS credentials required for Bedrock. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.")
    
    bedrock_client = boto3.client(
        "bedrock-runtime",
        region_name=AWS_REGION,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    )
    
    llm = ChatBedrock(
        model_id=LLM_MODEL,
        client=bedrock_client,
        model_kwargs={
            "temperature": LLM_TEMPERATURE,
            "max_tokens": LLM_MAX_TOKENS,
        },
    )
    print(f"‚úÖ LLM initialized: Bedrock / {LLM_MODEL}")
    
else:  # OpenAI-compatible (Ollama, LM Studio, vLLM, OpenAI)
    from langchain_openai import ChatOpenAI
    
    llm = ChatOpenAI(
        base_url=LLM_BASE_URL,
        model=LLM_MODEL,
        api_key=LLM_API_KEY,
        temperature=LLM_TEMPERATURE,
        max_tokens=LLM_MAX_TOKENS,
    )
    print(f"‚úÖ LLM initialized: OpenAI-compatible / {LLM_MODEL}")
    print(f"   Base URL: {LLM_BASE_URL}")

‚úÖ LLM initialized: Bedrock / anthropic.claude-3-sonnet-20240229-v1:0


In [7]:
# Quick test
response = llm.invoke("Say 'hello' and nothing else.")
print(f"LLM test response: {response.content}")

LLM test response: hello


## 4. Prompt Templates

In [4]:
# Source selection prompt - used to pre-filter relevant apps
SOURCE_SELECTION_PROMPT = """You are given a list of app names from product reviews.
Return ONLY the names of the apps that are relevant to the question.
If none are relevant, return "none".
Do not explain your reasoning.

Apps:
{sources}

Question:
{query}

Return format (comma-separated):
app1, app2, app3"""

# RAG prompt - used to generate answers from retrieved context
RAG_PROMPT = """You are a helpful assistant analyzing product reviews. Use the following review excerpts to answer the question.

Rules:
1. Be concise and direct.
2. Base your answer ONLY on the provided reviews.
3. If the reviews don't contain relevant information, say so.
4. Mention specific apps when relevant.
5. Include sentiment (positive/negative) when discussing features.

Reviews:
{context}

Question: {question}

Answer:"""

print("‚úÖ Prompt templates defined")

‚úÖ Prompt templates defined


## 5. Data Ingestion

Load CSV data, chunk it, and add to ChromaDB.

In [9]:
import pandas as pd
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Check if CSV exists
if not CSV_FILE.exists():
    print(f"‚ùå CSV file not found: {CSV_FILE}")
    print("   Please ensure the data file exists before running ingestion.")
else:
    print(f"‚úÖ CSV file found: {CSV_FILE}")
    
    # Load data
    df = pd.read_csv(CSV_FILE)
    print(f"   Loaded {len(df):,} rows")
    print(f"   Columns: {list(df.columns)}")
    
    # Show sample
    df.head(2)

  if not hasattr(np, "object"):



‚úÖ CSV file found: C:\Users\hayde\OneDrive\Documents\GitHub\sentio-plus\data\processed\sentio_plus_rag_ready.csv
   Loaded 50,000 rows
   Columns: ['review_id', 'app_id', 'app_name', 'category', 'rating', 'review_date', 'helpful_count', 'content_rating', 'app_avg_score', 'downloads', 'text_length', 'enriched_text']


In [10]:
def ingest_csv_to_chroma(
    df,
    collection,
    text_column="enriched_text",
    id_column="review_id",
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    batch_size=500,
    limit=None,
    clear_existing=False,
):
    """
    Ingest DataFrame into ChromaDB with chunking.
    
    Args:
        df: DataFrame with review data
        collection: ChromaDB collection
        text_column: Column containing review text
        id_column: Column containing review IDs
        chunk_size: Max characters per chunk
        chunk_overlap: Overlap between chunks
        batch_size: Documents per batch
        limit: Max rows to process (None = all)
        clear_existing: Whether to clear collection first
    
    Returns:
        Dict with ingestion stats
    """
    global chroma_client
    
    # Validate columns
    required_cols = [text_column, id_column, "app_name", "category", "rating", "review_date", "helpful_count"]
    missing = [col for col in required_cols if col not in df.columns]
    if missing:
        raise ValueError(f"Missing columns: {missing}")
    
    # Drop rows with missing text
    df = df.dropna(subset=[text_column]).copy()
    print(f"After dropna: {len(df):,} rows")
    
    # Apply limit
    if limit:
        df = df.head(limit)
        print(f"Limited to: {len(df):,} rows")
    
    # Clear collection if requested
    if clear_existing:
        chroma_client.delete_collection(collection.name)
        collection = chroma_client.get_or_create_collection(
            name=CHROMA_COLLECTION_NAME,
            metadata={"hnsw:space": "cosine"},
        )
        print("üóëÔ∏è Collection cleared")
    
    # Initialize text splitter
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
    )
    
    # Prepare documents
    all_chunks = []
    all_metadatas = []
    all_ids = []
    
    for _, row in df.iterrows():
        # Extract review text (after header if present)
        enriched_text = row[text_column]
        review_text = enriched_text.split("USER REVIEW: ")[-1] if "USER REVIEW: " in str(enriched_text) else enriched_text
        
        # Split into chunks
        chunks = splitter.split_text(str(review_text))
        
        # Create metadata and IDs for each chunk
        review_id = int(row[id_column])
        base_id = f"com.{row['app_name']}_{review_id}"
        
        for i, chunk in enumerate(chunks):
            all_chunks.append(chunk)
            all_metadatas.append({
                "review_id": review_id,
                "app_name": row["app_name"],
                "category": row["category"],
                "rating": int(row["rating"]),
                "date": str(row["review_date"]),
                "helpful_count": int(row["helpful_count"]),
                "chunk_index": i,
                "total_chunks": len(chunks),
            })
            all_ids.append(f"{base_id}_chunk_{i}")
    
    print(f"Prepared {len(all_chunks):,} chunks from {len(df):,} reviews")
    
    # Add to collection in batches
    total_added = 0
    for i in range(0, len(all_chunks), batch_size):
        end = min(i + batch_size, len(all_chunks))
        
        collection.add(
            documents=all_chunks[i:end],
            metadatas=all_metadatas[i:end],
            ids=all_ids[i:end],
        )
        
        total_added += (end - i)
        print(f"   ‚úÖ Batch {i}:{end} added ({total_added:,}/{len(all_chunks):,})")
    
    print(f"\n‚úÖ Ingestion complete!")
    print(f"   Rows processed: {len(df):,}")
    print(f"   Chunks added: {total_added:,}")
    print(f"   Collection count: {collection.count():,}")
    
    return {
        "rows_processed": len(df),
        "chunks_added": total_added,
        "collection_count": collection.count(),
        "collection": collection,
    }

In [5]:
# Run ingestion (skip if collection already has data)
if collection.count() == 0:
    print("Collection is empty. Running ingestion...\n")
    result = ingest_csv_to_chroma(
        df=df,
        collection=collection,
        limit=INGEST_LIMIT,
        clear_existing=False,
    )
    # Update collection reference if it was recreated
    collection = result.get("collection", collection)
else:
    print(f"‚úÖ Collection already has {collection.count():,} documents.")
    print("   Set clear_existing=True to re-ingest.")

‚úÖ Collection already has 1,071 documents.
   Set clear_existing=True to re-ingest.


## 6. Helper Functions

In [7]:
def get_all_app_names(collection):
    """Get all unique app names from the collection."""
    results = collection.get(include=["metadatas"])
    return {meta.get("app_name") for meta in results["metadatas"] if meta.get("app_name")}


def query_collection(
    collection,
    query_text,
    n_results=RETRIEVAL_TOP_K,
    threshold=RETRIEVAL_THRESHOLD,
    where=None,
):
    """
    Query the collection with distance threshold filtering.
    
    Args:
        collection: ChromaDB collection
        query_text: Search query
        n_results: Max results to return
        threshold: Max distance (lower = stricter). Cosine: 0=identical, 2=opposite.
        where: Optional metadata filter
    
    Returns:
        List of dicts with 'text', 'metadata', 'distance'
    """
    results = collection.query(
        query_texts=[query_text],
        n_results=n_results,
        where=where,
        include=["documents", "metadatas", "distances"],
    )
    
    docs = []
    if results["documents"] and results["documents"][0]:
        for text, meta, dist in zip(
            results["documents"][0],
            results["metadatas"][0],
            results["distances"][0],
        ):
            if dist <= threshold:
                docs.append({
                    "text": text,
                    "metadata": meta,
                    "distance": dist,
                })
    
    return docs


def select_relevant_sources(question, app_names):
    """
    Use LLM to select relevant app sources for a question.
    
    Args:
        question: User question
        app_names: Set of available app names
    
    Returns:
        List of relevant app names (empty if none relevant)
    """
    if not app_names:
        return []
    
    prompt = SOURCE_SELECTION_PROMPT.format(
        sources=", ".join(sorted(app_names)),
        query=question,
    )
    
    response = llm.invoke(prompt)
    content = response.content
    
    # Remove any thinking tags (for reasoning models)
    content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
    
    # Parse comma-separated response
    sources = [s.strip() for s in content.split(",")]
    
    # Return empty if "none"
    if len(sources) == 1 and sources[0].lower() == "none":
        return []
    
    return sources


def format_context(docs):
    """Format retrieved documents into context string."""
    formatted = []
    for doc in docs:
        meta = doc["metadata"]
        app = meta.get("app_name", "Unknown")
        rating = meta.get("rating", "?")
        text = doc["text"]
        formatted.append(f"[{app} - {rating}‚òÖ]\n{text}")
    return "\n\n".join(formatted)


print("‚úÖ Helper functions defined")

‚úÖ Helper functions defined


## 7. RAG Query Pipeline

In [8]:
@tool
def rag_query(
    question,
    filter_by_source=True,
    top_k=RETRIEVAL_TOP_K,
    threshold=RETRIEVAL_THRESHOLD,
):
    """
    Answer a question using RAG.
    
    Args:
        question: User question
        filter_by_source: Whether to use LLM to pre-filter sources
        top_k: Number of documents to retrieve
        threshold: Distance threshold for filtering
    
    Returns:
        Dict with answer, sources, num_docs, and selected_sources
    """
    print(f"üìù Question: {question}")
    print(f"   Filter by source: {filter_by_source}")
    print()
    
    # Step 1: Optionally filter sources using LLM
    metadata_filter = None
    selected_sources = []
    
    if filter_by_source:
        app_names = get_all_app_names(collection)
        selected_sources = select_relevant_sources(question, app_names)
        
        if selected_sources:
            metadata_filter = {"app_name": {"$in": selected_sources}}
            print(f"üéØ Filtering by apps: {selected_sources}")
        else:
            print("üîç No specific app filter applied")
    
    # Step 2: Retrieve relevant documents
    docs = query_collection(
        collection=collection,
        query_text=question,
        n_results=top_k,
        threshold=threshold,
        where=metadata_filter,
    )
    
    print(f"üìö Retrieved {len(docs)} documents")
    
    # Step 3: Handle no results
    if not docs:
        return {
            "answer": "I couldn't find any relevant reviews to answer your question.",
            "sources": [],
            "num_docs": 0,
            "selected_sources": selected_sources,
        }
    
    # Step 4: Format context
    context = format_context(docs)

    return context
    
    '''
    # Step 5: Generate answer
    prompt = RAG_PROMPT.format(context=context, question=question)
    response = llm.invoke(prompt)
    answer = response.content
    
    # Step 6: Extract unique sources
    sources = list({doc["metadata"].get("app_name", "unknown") for doc in docs})
    
    print(f"üì± Sources used: {sources}")
    print()
    
    return {
        "answer": answer,
        "sources": sources,
        "num_docs": len(docs),
        "selected_sources": selected_sources,
    }
    '''

print("‚úÖ RAG query function defined")

‚úÖ RAG query function defined


## 8. Test Queries

In [10]:
#agent = init_chat_model(BEDROCK_MODEL)

config = {
    "configurable": {
        "thread_id": "user-1"
    }
}

tools = [rag_query]

agent = create_agent(
    model=llm,
    tools=tools,
    checkpointer=InMemorySaver(),  # Enables memory
    name="conversational_agent"
)


result = agent.invoke({
    "messages": [
        ("user", input())
    ]
},
config=config)

print(result["messages"][-1])


result = agent.invoke({
    "messages": [
        ("user", "What was the last question?")
    ]
},
config=config)

print(result["messages"][-1])

üìù Question: What is Google Wallet?
   Filter by source: True

üéØ Filtering by apps: ['Google Wallet']
üìö Retrieved 5 documents
content='Based on the search results, Google Wallet appears to be a digital wallet app from Google that allows users to store payment cards like credit/debit cards and gift cards on their mobile device to make contactless payments. However, the reviews indicate some frustrations with limited functionality compared to competitors like Apple Wallet, issues adding certain card types like insurance cards, and occasional glitches or errors.\n\nIn summary, Google Wallet is a mobile payment and digital wallet service from Google, but the reviews suggest it may have some limitations and usability issues compared to alternatives. The core functionality is storing payment cards for contactless mobile payments, but users seem to want more capabilities to store other card types like IDs, insurance cards, etc.' additional_kwargs={'usage': {'prompt_tokens': 965, 'comp

In [None]:
# Test query 1: General question
result = rag_query("What do people like about apps?")

print("=" * 60)
print("üí¨ ANSWER:")
print("=" * 60)
print(result["answer"])

üìù Question: What are common complaints about apps?
   Filter by source: True

üîç No specific app filter applied
üìö Retrieved 5 documents
üì± Sources used: ['Google Wallet', 'Western Union Send Money Now']

üí¨ ANSWER:
Based on the provided reviews, some common complaints about apps include:

1. Usability issues (negative sentiment):
   - "Painful to use"
   - "Intermittent dropouts without explanation"
   - "Seemingly completely random 'verification' requirements"
   - "The app crashes often during the process"

2. Technical issues/glitches (negative sentiment):
   - "It is too glitchy now"
   - "Slow on launch, and usually requires a relaunch because it just loads forever"
   - "The app needs technical attention"

3. Functionality issues (negative sentiment):
   - "The apps been pretty much useless"
   - "It's saying I haven't passed security?"
   - "Constantly robs you of loyalty points everytime you go to use them"
   - "I've had friends have their money get stuck processin

In [15]:
# Test query 2: Specific topic
result = rag_query("What are common complaints about apps?")

print("=" * 60)
print("üí¨ ANSWER:")
print("=" * 60)
print(result["answer"])

üìù Question: What are common complaints about apps?
   Filter by source: True

üîç No specific app filter applied
üìö Retrieved 5 documents
üì± Sources used: ['Google Wallet', 'Western Union Send Money Now']

üí¨ ANSWER:
Based on the provided reviews, some common complaints about apps include:

1. Poor performance and glitches (negative)
   - "Terrible app. Painful to use, intermittent dropouts without explanation"
   - "App was great over a year ago. Transactions were seamless. It is too glitchy now."
   - "The app crashes often during the process"

2. Security and verification issues (negative)
   - "seemingly completely random "verification" requirements"
   - "it's saying I haven't passed security?"

3. Difficulty using loyalty points/rewards (negative)
   - "Constantly robs you of loyalty points everytime you go to use them."
   - "194 wu points and still being charged!"

4. Slow or unresponsive (negative)
   - "Slow on launch, and usually requires a relaunch because it just

In [16]:
# Test query 3: App-specific (without source filtering)
result = rag_query(
    "What do people think about navigation apps?",
    filter_by_source=False,
)

print("=" * 60)
print("üí¨ ANSWER:")
print("=" * 60)
print(result["answer"])

üìù Question: What do people think about navigation apps?
   Filter by source: False

üìö Retrieved 5 documents
üì± Sources used: ['Google Wallet']

üí¨ ANSWER:
The provided reviews do not contain any relevant information about navigation apps. The reviews are focused on discussing the Google Wallet app and its features related to storing payment cards, tickets, and passes. There are no mentions of navigation or navigation apps in these reviews.


## 9. Collection Stats

In [17]:
def get_collection_stats(collection):
    """Get statistics about the collection."""
    results = collection.get(include=["metadatas"])
    metadatas = results["metadatas"]
    
    categories = {meta.get("category") for meta in metadatas if meta.get("category")}
    apps = {meta.get("app_name") for meta in metadatas if meta.get("app_name")}
    
    return {
        "total_documents": collection.count(),
        "unique_categories": len(categories),
        "unique_apps": len(apps),
        "categories": sorted(categories) if len(categories) <= 20 else f"{len(categories)} categories",
        "apps": sorted(apps) if len(apps) <= 20 else f"{len(apps)} apps",
    }


stats = get_collection_stats(collection)
print("üìä Collection Statistics")
print("=" * 40)
print(f"Total documents: {stats['total_documents']:,}")
print(f"Unique apps: {stats['unique_apps']}")
print(f"Unique categories: {stats['unique_categories']}")
print()
print("Categories:", stats['categories'])
print()
print("Apps:", stats['apps'])

üìä Collection Statistics
Total documents: 1,071
Unique apps: 2
Unique categories: 1

Categories: ['Finance']

Apps: ['Google Wallet', 'Western Union Send Money Now']


## 10. Interactive Query (Optional)

In [19]:
# Interactive query cell - modify the question and run
QUESTION = "Which apps have the best reviews?"
FILTER_BY_SOURCE = True

result = rag_query(QUESTION, filter_by_source=FILTER_BY_SOURCE)

print("=" * 60)
print("üí¨ ANSWER:")
print("=" * 60)
print(result["answer"])
print()
print(f"üìä Documents used: {result['num_docs']}")
print(f"üì± Sources: {result['sources']}")

üìù Question: Which apps have the best reviews?
   Filter by source: True

üîç No specific app filter applied
üìö Retrieved 5 documents
üì± Sources used: ['Google Wallet', 'Western Union Send Money Now']

üí¨ ANSWER:
Based solely on the provided reviews, none of the apps mentioned (Google Wallet, Western Union Send Money Now) have positive reviews. All of the reviews are negative, criticizing various issues with the apps such as poor functionality, errors, and problems using features like loyalty points. The reviews do not contain any information suggesting that any of these apps have the "best" reviews.

üìä Documents used: 5
üì± Sources: ['Google Wallet', 'Western Union Send Money Now']


## Utility: Clear Collection

In [None]:
# Uncomment to clear the collection and re-ingest
# chroma_client.delete_collection(CHROMA_COLLECTION_NAME)
# collection = chroma_client.get_or_create_collection(
#     name=CHROMA_COLLECTION_NAME,
#     metadata={"hnsw:space": "cosine"},
# )
# print(f"üóëÔ∏è Collection cleared. Count: {collection.count()}")