In [9]:
import chromadb
import pandas as pd
from sentence_transformers import SentenceTransformer


In [11]:
df = pd.read_csv("data.csv")

df["content"] = df.apply(
    lambda row: f"{row['product_name']}: {row['description']} (Section: {row['section_name']})",
    axis=1
)


In [12]:
model = SentenceTransformer("all-MiniLM-L6-v2")


In [13]:
embeddings = model.encode(df["content"].tolist()).tolist()


In [14]:
chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection(name="products")


In [15]:
collection.add(
    documents=df["content"].tolist(),
    embeddings=embeddings,
    ids=[str(pid) for pid in df["product_id"]],
    metadatas=df[["product_name", "section_name"]].to_dict(orient="records")
)


In [22]:
query = "Where can I find Face Wash?"
query_embedding = model.encode([query]).tolist()

results = collection.query(
    query_embeddings=query_embedding,
    n_results=1
)

print("Top Match:")
print(results['documents'][0][0])
print("Metadata:", results['metadatas'][0][0])


Top Match:
Face Wash: Gentle face cleanser (Section: Personal Care)
Metadata: {'product_name': 'Face Wash', 'section_name': 'Personal Care'}


In [18]:
df.head()

Unnamed: 0,product_id,product_name,description,section_name,content
0,1,Banana,Fresh yellow bananas,Fruits,Banana: Fresh yellow bananas (Section: Fruits)
1,2,Toothpaste,Colgate advanced cleaning,Personal Care,Toothpaste: Colgate advanced cleaning (Section...
2,3,Eggs,Organic free-range eggs,Dairy,Eggs: Organic free-range eggs (Section: Dairy)
3,4,Chips,Salted potato chips,Snacks,Chips: Salted potato chips (Section: Snacks)
4,5,Apple,Red delicious apples,Fruits,Apple: Red delicious apples (Section: Fruits)
