In [1]:
!pip install chromadb pandas crewai sentence-transformers




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import os
import pandas as pd
import chromadb
from sentence_transformers import SentenceTransformer

In [2]:
# Load CSVs
catalog_df = pd.read_csv("catalog.csv")
support_df = pd.read_csv("support_kb.csv")
orders_df = pd.read_csv("orders.csv")

In [3]:
embed_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")

In [4]:
def embed_text(text: str):
    """Generate embeddings for the given text using SentenceTransformers"""
    return embed_model.encode(text).tolist()

In [5]:
# Init ChromaDB
chroma_client = chromadb.PersistentClient(path="chroma_multi_store")

In [6]:
catalog_store = chroma_client.get_or_create_collection("catalog")
support_store = chroma_client.get_or_create_collection("support_kb")
orders_store = chroma_client.get_or_create_collection("orders")

In [10]:
embed_text("Wireless Mouse - Ergonomic wireless mouse with 2.4GHz USB receiver ($19.99)")

[-0.25410377979278564,
 -0.10110822319984436,
 -0.2622339427471161,
 0.6751374006271362,
 -0.6061552166938782,
 0.5469239950180054,
 1.1228885650634766,
 0.5166686773300171,
 0.15753264725208282,
 0.5211120843887329,
 0.8774848580360413,
 0.014699552208185196,
 0.27030885219573975,
 0.2207338809967041,
 0.3710798919200897,
 0.7120921015739441,
 0.037192754447460175,
 -0.46531176567077637,
 0.17864373326301575,
 -0.48255467414855957,
 0.7355422377586365,
 0.39469587802886963,
 -0.6830536723136902,
 -0.46522387862205505,
 -0.6530149579048157,
 0.5158692598342896,
 -0.946524441242218,
 -0.47287315130233765,
 0.8810451626777649,
 0.8650549054145813,
 -0.3660692274570465,
 -0.9254812598228455,
 0.15549001097679138,
 -0.8258579969406128,
 -0.5504652857780457,
 -0.2607414722442627,
 0.049021050333976746,
 0.6054009795188904,
 -1.025952696800232,
 -0.4044645428657532,
 0.4752812385559082,
 0.6864852905273438,
 0.6667174696922302,
 -1.0097296237945557,
 0.5097528100013733,
 -0.36077067255973816

In [35]:
# Insert embeddings
for idx, row in catalog_df.iterrows():
    text = f"{row['product_name']} - {row['description']} (${row['price']})"
    catalog_store.add(documents=[text], ids=[f"c{idx}"], embeddings=[embed_text(text)])
    print(text, idx, embed_text(text))

Wireless Mouse - Ergonomic wireless mouse with 2.4GHz USB receiver ($19.99) 0 [-0.25410377979278564, -0.10110822319984436, -0.2622339427471161, 0.6751374006271362, -0.6061552166938782, 0.5469239950180054, 1.1228885650634766, 0.5166686773300171, 0.15753264725208282, 0.5211120843887329, 0.8774848580360413, 0.014699552208185196, 0.27030885219573975, 0.2207338809967041, 0.3710798919200897, 0.7120921015739441, 0.037192754447460175, -0.46531176567077637, 0.17864373326301575, -0.48255467414855957, 0.7355422377586365, 0.39469587802886963, -0.6830536723136902, -0.46522387862205505, -0.6530149579048157, 0.5158692598342896, -0.946524441242218, -0.47287315130233765, 0.8810451626777649, 0.8650549054145813, -0.3660692274570465, -0.9254812598228455, 0.15549001097679138, -0.8258579969406128, -0.5504652857780457, -0.2607414722442627, 0.049021050333976746, 0.6054009795188904, -1.025952696800232, -0.4044645428657532, 0.4752812385559082, 0.6864852905273438, 0.6667174696922302, -1.0097296237945557, 0.50975

In [11]:
len(embed_text("Wireless Mouse - Ergonomic wireless mouse with 2.4GHz USB receiver ($19.99)"))

1024

In [13]:
for idx, row in support_df.iterrows():
    text = f"Q: {row['question']} A: {row['answer']}"
    support_store.add(documents=[text], ids=[f"s{idx}"], embeddings=[embed_text(text)])

for idx, row in orders_df.iterrows():
    text = f"Order {row['order_id']} - {row['user_name']} ordered {row['product_name']} ({row['status']} on {row['date']})"
    orders_store.add(documents=[text], ids=[f"o{idx}"], embeddings=[embed_text(text)])

In [17]:
query = 'wireless mouse'
results = catalog_store.query(query_embeddings=[embed_text(query)], n_results = 3)
[str(item) for sublist in results["documents"] for item in sublist]

['Wireless Mouse - Ergonomic wireless mouse with 2.4GHz USB receiver ($19.99)',
 'Wireless Charger - Fast charging pad for smartphones ($29.99)',
 'Wireless Earbuds - True wireless earbuds with charging case ($99.99)']

In [18]:
def search_catalog(query: str) -> dict:
    results = catalog_store.query(query_embeddings=[embed_text(query)], n_results=3)
    # Flatten nested list and keep only strings
    flat_results = [str(item) for sublist in results["documents"] for item in sublist]
    return {"results": flat_results}

def search_support(query: str) -> dict:
    results = support_store.query(query_embeddings=[embed_text(query)], n_results=3)
    flat_results = [str(item) for sublist in results["documents"] for item in sublist]
    return {"results": flat_results}

def search_orders(query: str) -> dict:
    results = orders_store.query(query_embeddings=[embed_text(query)], n_results=3)
    flat_results = [str(item) for sublist in results["documents"] for item in sublist]
    return {"results": flat_results}

In [19]:
search_catalog("4k monitor")

{'results': ['4K Monitor - 27-inch 4K UHD display with HDMI and DisplayPort ($299.99)',
  'Smart TV - 50-inch 4K UHD Smart TV with streaming apps ($449.99)',
  'Drone - Quadcopter drone with 4K camera and GPS ($599.99)']}

In [20]:
search_support("how to return the order")

{'results': ['Q: How do I return a product? A: You can return a product within 30 days using the returns portal in your account.',
  'Q: How do I cancel an order? A: You can cancel an order within 24 hours before it ships by visiting your Orders page.',
  'Q: How can I track my order? A: You can track your order from the Orders section in your account dashboard.']}

In [21]:
search_orders("Who ordered wireless mouse")

{'results': ['Order 5001 - John Doe ordered Wireless Mouse (Delivered on 2024-08-15)',
  'Order 5022 - Benjamin Allen ordered Wireless Earbuds (Processing on 2024-08-23)',
  'Order 5006 - Michael Lee ordered Bluetooth Speaker (Delivered on 2024-07-28)']}

In [22]:
from crewai import Agent, Task, Crew, LLM
from crewai.tools import tool

In [23]:
os.environ["GEMINI_API_KEY"] = "your_api_key"

In [24]:
# --- Initialize LLM (Azure OpenAI or any other supported model) ---
llm = LLM(
    model="gemini/gemini-2.0-flash", 
    temperature=0.2
    )

In [27]:
# --- Define tools with docstrings + type annotations ---
@tool("search_catalog")
def tool_search_catalog(query: str) -> dict:
    """Search the catalog for product details, prices, and availability."""
    return search_catalog(query)

@tool("search_support")
def tool_search_support(query: str) -> dict:
    """Search the customer support knowledge base (returns, warranty, shipping)."""
    return search_support(query)

@tool("search_orders")
def tool_search_orders(query: str) -> dict:
    """Search the order database for order status, delivery, or cancellations."""
    return search_orders(query)

In [28]:
rag_router = Agent(
    role = 'RAG Router Agent',
    goal="Understand the user query, pick the right tool, run it, and refine results into a clear final answer.",
    backstory=(
        "You are the RAG Router Agent, a single powerful agent that handles the entire RAG process. "
        "You decide which vector store to query, execute the retrieval, "
        "review multiple results, and return only the best refined answer."
    ),
    llm = llm,
    tools = [tool_search_catalog, tool_search_support, tool_search_orders]
)

In [29]:
# ------------------------------
# Query Runner
# ------------------------------
def run_query(user_query: str) -> str:
    task = Task(
        description=(
            f"The user asked: \"{user_query}\"\n\n"
            "Steps:\n"
            "1. Decide which tool (catalog, support, orders) is most relevant.\n"
            "2. Execute that tool with the query.\n"
            "3. The tool will return up to 3 possible matches.\n"
            "4. Read and compare them carefully.\n"
            "5. Select the single most relevant result that answers the user’s question.\n"
            "6. Rewrite it as a natural, human-friendly response.\n\n"
            "Do NOT just list all results. Do NOT return raw tool output.\n"
            "Only provide the final refined answer."
        ),
        expected_output="A single, concise, natural-language answer to the user’s query.",
        agent=rag_router
    )

    crew = Crew(agents=[rag_router], tasks=[task], verbose=True)
    crew_output = crew.kickoff()
    return crew_output.raw.strip()

In [30]:
# ------------------------------
if __name__ == "__main__":
    query = "Which product the Chris Johnson have ordered?"
    answer = run_query(query)
    print("User Query:", query)
    print("Final Answer:", answer)

Output()

Output()

User Query: Which product the Chris Johnson have ordered?
Final Answer: Chris Johnson ordered Smartwatch in order number 5007, which is currently processing as of 2024-08-22.


In [31]:
# ------------------------------
if __name__ == "__main__":
    query = "How do I redeem a voucher?"
    answer = run_query(query)
    print("User Query:", query)
    print("Final Answer:", answer)

Output()

Output()

User Query: How do I redeem a voucher?
Final Answer: Enter your gift voucher code at checkout to redeem the balance.


In [36]:
if __name__ == "__main__":
    query = "Do we have smart tv related products?"
    answer = run_query(query)
    print("User Query:", query)
    print("Final Answer:", answer)

Output()

Output()

User Query: Do we have smart tv related products?
Final Answer: Smart TV - 50-inch 4K UHD Smart TV with streaming apps ($449.99)
