In [1]:
import chromadb
import pandas as pd
from sentence_transformers import SentenceTransformer


  from .autonotebook import tqdm as notebook_tqdm


In [16]:
items_df = pd.read_csv("items.csv")
sections_df = pd.read_csv("sections.csv")


merged_df = pd.merge(items_df, sections_df, left_on='section', right_on='id', how='left')



In [17]:
merged_df.head()


Unnamed: 0,id_x,name,category,section,aisle,shelf,x_x,y_x,deal,id_y,section_name,color,x_y,y_y,width,height
0,1,Apples,Fruit,produce,1,A,2,2,20% Off,produce,Fresh Produce,#059669,1,1,4,3
1,2,Bananas,Fruit,produce,1,B,4,2,,produce,Fresh Produce,#059669,1,1,4,3
2,3,Carrots,Vegetable,produce,2,A,2,3,,produce,Fresh Produce,#059669,1,1,4,3
3,4,Spinach,Vegetable,produce,2,B,4,3,,produce,Fresh Produce,#059669,1,1,4,3
4,5,Oranges,Fruit,produce,1,A,1,1,,produce,Fresh Produce,#059669,1,1,4,3


In [18]:
merged_df['content'] = merged_df.apply(lambda row:
    f"{row['name']}: {row['category']} item. {row.get('deal', '')} Located in section {row['section']}, aisle {row['aisle']}, shelf {row['shelf']}.",
    axis=1
)

In [20]:
chroma_client = chromadb.Client()
collection = chroma_client.get_or_create_collection(name="store_items")


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [21]:
embedder = SentenceTransformer("all-MiniLM-L6-v2")

In [24]:
collection.add(
    documents=merged_df["content"].tolist(),
    embeddings=embedder.encode(merged_df["content"].tolist()),  # generate embeddings here
    ids=[str(i) for i in merged_df.index],  # or use merged_df["id"] if it's unique
    metadatas=merged_df[["name", "category", "section", "aisle", "shelf", "x_x", "y_x", "deal", "id_y", "section_name", "color", "x_y", "y_y", "width", "height"]].to_dict(orient="records")
)


Failed to send telemetry event CollectionAddEvent: capture() takes 1 positional argument but 3 were given


In [27]:
def query_item_location(query: str):
    embedding = embedder.encode(query).tolist()
    result = collection.query(query_embeddings=[embedding], n_results=1)

    metadata = result['metadatas'][0][0]
    
    return {
        "name": metadata['name'],
        "category": metadata['category'],
        "deal": metadata['deal'],
        "section": metadata['section'],
        "aisle": metadata['aisle'],
        "shelf": metadata['shelf'],
        "coordinates": {
            "x": metadata['x_y'],
            "y": metadata['y_y'],
            "width": metadata['width'],
            "height": metadata['height']
        }
    }

In [28]:
result = query_item_location("Where can I find strawberries?")
print(result)

{'name': 'Strawberries', 'category': 'Fruit', 'deal': '15% Off', 'section': 'produce', 'aisle': 1, 'shelf': 'A', 'coordinates': {'x': 1, 'y': 1, 'width': 4, 'height': 3}}
