# Embedding-Based Recommender

This notebook builds an advanced recommender system using sentence embeddings.
We use product names to recommend similar discounted products


In [90]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


In [21]:
# Load dataset
data = pd.read_csv('prufa.csv')

# Remove 'Unknown' fine categories
data = data[data['fine_category'] != 'Unknown'].reset_index(drop=True)

# Combine columns to create richer text for embedding
data['text_for_embedding'] = (
    data['final_category'].astype(str) + " " +
    data['mid_category'].astype(str) + " " +
    data['broad_category'].astype(str)
)


In [22]:
# Load the MiniLM model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings
product_texts = data['text_for_embedding'].tolist()
embeddings = model.encode(product_texts, show_progress_bar=True)

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(embeddings)


Batches:   0%|          | 0/10 [00:00<?, ?it/s]

In [93]:
def recommend_semantic_discounted(product_name, top_n=5, similarity_threshold=0.4):
    """
    Recommend products that are semantically similar (based on embeddings)
    and have a good discount.
    """
    # Match product by name
    matches = data[data['text_for_embedding'].str.contains(product_name, case=False, na=False)]

    if matches.empty:
        print(f"No product found with name containing '{product_name}'.")
        print("\nHere are some valid examples you can try:")
        print(data['final_category'].drop_duplicates().sample(5, random_state=42).tolist())
        return None

    index = matches.index[0]
    input_vector = embeddings[index]
    
    # Compute similarity scores to all products
    scores = cosine_similarity([input_vector], embeddings)[0]
    
    # Filter by similarity threshold and exclude the same product
    data['similarity'] = scores
    similar_items = data[
        (data.index != index) &
        (data['similarity'] >= similarity_threshold)
    ].copy()
    
    # Sort by similarity + discount
    similar_items = (
        similar_items
        .sort_values(by=['similarity', 'discount'], ascending=[False, False])
        .drop_duplicates(subset=['final_category'])  # avoid redundancy
        .head(top_n)
    )

    print(f"Found product: {data.loc[index, 'text_for_embedding']}")
    return similar_items[['final_category', 'broad_category', 'original_price', 'new_price', 'discount', 'similarity']]


In [97]:
recommend_semantic_discounted("pork")




Found product: Smoked Saddle Of Pork Meat Dairy And Cold Storage


Unnamed: 0,final_category,broad_category,original_price,new_price,discount,similarity
213,Smoked Saddle Of Pork,Dairy And Cold Storage,17.95,9.0,8.95,1.0
183,Meat Sausage,Dairy And Cold Storage,38.95,17.0,21.95,0.705938
86,Pork Sausages,Dairy And Cold Storage,69.95,45.0,24.95,0.698991
190,Corned Beef,Dairy And Cold Storage,18.95,3.0,15.95,0.683451
23,Cold Cuts Ham,Dairy And Cold Storage,22.95,12.0,10.95,0.678451


In [98]:
recommend_semantic_discounted("cheese")


Found product: Sliced Cheese Cheese Dairy And Cold Storage


Unnamed: 0,final_category,broad_category,original_price,new_price,discount,similarity
166,Sliced Cheese,Dairy And Cold Storage,45.0,27.0,18.0,1.0
223,Cream Cheese,Dairy And Cold Storage,29.95,15.0,14.95,0.933465
26,Cottage Cheese,Dairy And Cold Storage,14.95,9.0,5.95,0.912679
95,Cheese Specialities,Dairy And Cold Storage,24.95,19.0,5.95,0.903822
81,Cheese For Slicing,Dairy And Cold Storage,108.95,58.0,50.95,0.891822


In [99]:
recommend_semantic_discounted("salami")


Found product: Salami Meat Dairy And Cold Storage


Unnamed: 0,final_category,broad_category,original_price,new_price,discount,similarity
135,Salami,Dairy And Cold Storage,42.95,25.0,17.95,1.0
183,Meat Sausage,Dairy And Cold Storage,38.95,17.0,21.95,0.776224
203,Cold Cuts Chicken,Dairy And Cold Storage,15.95,4.0,11.95,0.767332
207,Sliced Lunch Meats,Dairy And Cold Storage,24.95,8.0,16.95,0.758905
190,Corned Beef,Dairy And Cold Storage,18.95,3.0,15.95,0.753048


In [100]:
recommend_semantic_discounted("ham")


Found product: Cold Cuts Ham Meat Dairy And Cold Storage


Unnamed: 0,final_category,broad_category,original_price,new_price,discount,similarity
187,Cold Cuts Ham,Dairy And Cold Storage,19.95,9.0,10.95,1.0
203,Cold Cuts Chicken,Dairy And Cold Storage,15.95,4.0,11.95,0.847675
113,Cold Cuts Turkey,Dairy And Cold Storage,16.95,9.0,7.95,0.831731
207,Sliced Lunch Meats,Dairy And Cold Storage,24.95,8.0,16.95,0.793779
183,Meat Sausage,Dairy And Cold Storage,38.95,17.0,21.95,0.745349
