In [29]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import json
from pathlib import Path

def score_deals_by_comparison():
    model = SentenceTransformer('all-MiniLM-L6-v2')  # Lightweight but effective model
    
    # Get all items across stores
    all_items = []
    for file in Path('flyer_data').glob('*.json'):
        with open(file) as f:
            data = json.load(f)
            for item in data['items']:
                all_items.append({
                    'store': data['store'],
                    'name': item['name'],
                    'brand': item['brand'],
                    'price': float(item['price']),
                    'image': item['cutout_image_url'],
                })
    
    # Encode all item names
    item_names = [f"{item['brand']} {item['name']}" if item['brand'] else item['name'] 
                 for item in all_items]
    embeddings = model.encode(item_names)
    
    # Add embeddings to all_items
    for i, item in enumerate(all_items):
        item['embedding'] = embeddings[i]
    
    # Find similar items and compare prices
    deals = []
    for i, item in enumerate(all_items):
        similarities = cosine_similarity([embeddings[i]], embeddings)[0]
        similar_indices = [j for j in range(len(similarities)) 
                         if similarities[j] > 0.8 and j != i]  # 80% similarity threshold
        
        if similar_indices:
            similar_prices = [all_items[j]['price'] for j in similar_indices]
            avg_price = sum(similar_prices) / len(similar_prices)
            price_diff = avg_price - item['price']
            
            if price_diff > 0:  # It's cheaper than similar items
                deals.append({
                    'item': item,
                    'savings': price_diff,
                    'similar_items': [all_items[j] for j in similar_indices],
                    'similarity_score': similarities[similar_indices].mean()
                })
    
    return sorted(deals, key=lambda x: x['savings'], reverse=True)

deals = score_deals_by_comparison()
# print the top 10 deals
for deal in deals[:10]:
    print(deal)

{'item': {'store': 'Costco ', 'name': 'Round Brilliant 2.00 ctw VS2 Clarity, I Color Diamond Platinum Band', 'brand': None, 'price': 1499.97, 'image': 'https://f.wishabi.net/page_items/354099400/1729260171/extra_large.jpg', 'embedding': array([ 1.42899202e-02,  3.49805765e-02, -4.23535965e-02,  2.32675360e-04,
       -6.25097677e-02, -4.10071500e-02,  1.00660808e-01,  8.84054899e-02,
       -6.25300631e-02, -2.75153853e-02, -6.72453418e-02, -3.13200578e-02,
       -1.93514563e-02, -8.50076750e-02,  7.87793286e-03,  6.08386807e-02,
        7.06749707e-02, -2.08859034e-02,  1.88123423e-03, -6.55619577e-02,
       -1.06091537e-02, -5.92677146e-02, -5.31646945e-02,  6.50141481e-03,
       -7.80382901e-02,  7.14555606e-02, -3.70951220e-02,  2.96578165e-02,
        5.42396083e-02, -8.75476971e-02, -2.77099162e-02,  9.39310268e-02,
       -9.92873684e-03, -9.87314060e-03, -7.78964832e-02, -1.10300846e-01,
        2.32353737e-03,  5.18931672e-02, -2.91835759e-02, -3.86472140e-03,
        7.415

In [31]:
def pretty_print_deal(deal):
    print(f'Name: {deal["item"]["name"]}\nBrand: {deal["item"]["brand"]}\nPrice: ${deal["item"]["price"]:.2f}\nSavings: ${deal["savings"]:.2f}\n Image cutout: {deal["item"]["image"]}\n Similar item cutouts: {[x["image"] for x in deal["similar_items"]]}\n')

# search the deals by name
# search_term = input('Enter a search term: ')
# for deal in deals:
#     if search_term.lower() in deal['item']['name'].lower():
#         pretty_print_deal(deal)

search_term = 'fruit'
fruit_deals = []
model = SentenceTransformer('all-MiniLM-L6-v2')
search_embedding = model.encode([search_term])[0]  # Get the first (and only) embedding
for deal in deals:
    similarity = cosine_similarity([search_embedding], [deal['item']['embedding']])[0][0]
    if similarity > 0.4:
        pretty_print_deal(deal)
        fruit_deals.append(deal)

print(f'Found {len(fruit_deals)} deals for {search_term}')


Name: Welch's® 3-pk. Fruit Snacks
Brand: Welch's
Price: $1.25
Savings: $4.75
 Image cutout: https://f.wishabi.net/page_items/351837303/1727658841/extra_large.jpg
 Similar item cutouts: ['https://f.wishabi.net/page_items/354129153/1729278715/extra_large.jpg', 'https://f.wishabi.net/page_items/354099482/1729257729/extra_large.jpg']

Name: Strawberries
Brand: None
Price: $2.98
Savings: $3.02
 Image cutout: https://f.wishabi.net/page_items/353711218/1729056674/extra_large.jpg
 Similar item cutouts: ['https://f.wishabi.net/page_items/353326754/1728547996/extra_large.jpg']

Name: Honeycrisp Apples
Brand: None
Price: $0.98
Savings: $2.76
 Image cutout: https://f.wishabi.net/page_items/353711205/1729056672/extra_large.jpg
 Similar item cutouts: ['https://f.wishabi.net/page_items/353584688/1728881949/extra_large.jpg', 'https://f.wishabi.net/page_items/353326535/1728547992/extra_large.jpg']

Name: Red, Green or Black Seedless Grapes
Brand: None
Price: $0.88
Savings: $2.10
 Image cutout: https://

In [42]:
from openai import OpenAI
import os
from dotenv import load_dotenv
load_dotenv()

client = OpenAI()

def get_price_per_unit(item):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"Calculate the price per unit for this item: {item['name']} priced at ${item['price']:.2f}. Respond with only the price per unit in the format $X.XX/unit, where unit is one of: lb, oz, g, kg, gal, qt, pt, fl oz, ea. If it doesn't make sense to calculate a price per unit, respond with 'not applicable'. Use the most appropriate unit based on the image and item description."
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": item['image'],
                        },
                    },
                ],
            }
        ],
        max_tokens=30,
    )
    return response.choices[0].message.content.strip()

# Add price per unit to fruit_deals
for deal in fruit_deals:
    deal['price_per_unit'] = get_price_per_unit(deal['item'])

# Update pretty_print_deal function to include price per unit
def pretty_print_deal(deal):
    print(f"Name: {deal['item']['name']}")
    print(f"Brand: {deal['item']['brand']}")
    print(f"Price: ${deal['item']['price']:.2f}")
    print(f"Savings: ${deal['savings']:.2f}")
    print(f"Price per unit: {deal.get('price_per_unit', 'N/A')}")
    print(f"Image cutout: {deal['item']['image']}")
    print(f"Similar item cutouts: {[x['image'] for x in deal['similar_items']]}\n")

# Print updated fruit deals
for deal in fruit_deals:
    pretty_print_deal(deal)



Name: Welch's® 3-pk. Fruit Snacks
Brand: Welch's
Price: $1.25
Savings: $4.75
Price per unit: $0.42/ea
Image cutout: https://f.wishabi.net/page_items/351837303/1727658841/extra_large.jpg
Similar item cutouts: ['https://f.wishabi.net/page_items/354129153/1729278715/extra_large.jpg', 'https://f.wishabi.net/page_items/354099482/1729257729/extra_large.jpg']

Price per unit: $0.42/ea

Name: Strawberries
Brand: None
Price: $2.98
Savings: $3.02
Price per unit: $2.98/lb
Image cutout: https://f.wishabi.net/page_items/353711218/1729056674/extra_large.jpg
Similar item cutouts: ['https://f.wishabi.net/page_items/353326754/1728547996/extra_large.jpg']

Price per unit: $2.98/lb

Name: Honeycrisp Apples
Brand: None
Price: $0.98
Savings: $2.76
Price per unit: $0.98/lb
Image cutout: https://f.wishabi.net/page_items/353711205/1729056672/extra_large.jpg
Similar item cutouts: ['https://f.wishabi.net/page_items/353584688/1728881949/extra_large.jpg', 'https://f.wishabi.net/page_items/353326535/1728547992/ext