# Embedding-Based Recommender

This notebook builds an advanced recommender system using sentence embeddings.
We use product names to recommend similar discounted products


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
# Load dataset
data = pd.read_csv('food_waste_products_combined.csv')

# Remove 'Unknown' fine categories
data = data[data['final_category'] != ''].reset_index(drop=True)

# Remove Ready meals
data = data[data['category2'] != 'Ready To Eat Meals'].reset_index(drop=True)

In [None]:
#pip install deep_translator

In [3]:
from deep_translator import GoogleTranslator

data['description_en'] = data['description'].apply(
    lambda x: GoogleTranslator(source='da', target='en').translate(x) if pd.notnull(x) else ""
)


In [4]:
print(data[['description', 'description_en']].head(10))


                     description                         description_en
0               KERNERUGBRØD ØGO                     Core rye bread Øgo
1  JUICE HINDBÆR SOLBÆR INNOCENT  Juice Raspberry Blackcurrant Innocent
2           MANGO/PASS LØGISMOSE                   Mango/Pass Løgismose
3            KRABBESALAT K-SALAT                     Crab salad K Salad
4           SKOVBÆR DRIK ACTIMEL           Forest berries Drink Actimel
5       ITALIENSK SALAT GRAASTEN                 Italian Salad Graasten
6     DET GODE GULEROD SCHULSTAD              The good carrot Schulstad
7          INNER WINNER INNOCENT                  Inner Winner Innocent
8             OKSESPEGEPØLSE ØGO               Beef sausage sausage Øgo
9             ÆBLEJUICE INNOCENT                   Apple juice innocent


In [5]:
# Combine columns to create richer text for embedding
data['text_for_embedding'] = (
    data['category1'].astype(str) + " " +
    data['category2'].astype(str) + " " +
    data['category3'].astype(str) + " " +
    data['category4'].astype(str) + " " +
    data['category5'].astype(str) + " " +
    data['category6'].astype(str) + " " +
    data['category7'].astype(str) + " " +
    data['category8'].astype(str) + " " +
    data['category9'].astype(str) + " " +
    data['category10'].astype(str) + " " +
    data['final_category'].astype(str) + " " +
    data['description_en'].astype(str)
)

In [6]:
# Load the MiniLM model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings
product_texts = data['text_for_embedding'].tolist()
embeddings = model.encode(product_texts, show_progress_bar=True)

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(embeddings)


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

In [7]:
def recommend_semantic_discounted(product_name, top_n=5, similarity_threshold=0.4):
    """
    Recommend products that are semantically similar (based on embeddings)
    and have a good discount.
    """
    matches = data[data['text_for_embedding'].str.contains(product_name, case=False, na=False)]

    if matches.empty:
        print(f"No product found with name containing '{product_name}'.")
        print("\nHere are some valid examples you can try:")
        print(data['final_category'].drop_duplicates().sample(5, random_state=42).tolist())
        return None

    index = matches.index[0]
    input_vector = embeddings[index]

    scores = cosine_similarity([input_vector], embeddings)[0]
    data['similarity'] = scores

    similar_items = data[
        (data.index != index) &
        (data['similarity'] >= similarity_threshold)
    ].copy()

    similar_items = (
        similar_items
        .sort_values(by=['similarity', 'discount'], ascending=[False, False])
        .drop_duplicates(subset=['final_category'])
        .head(top_n)
    )

    print("Found product info:")
    print(data.loc[index, ['final_category', 'zip_code', 'store_name', 'store_street']])
    return similar_items[['zip_code', 'store_name', 'store_street', 'final_category', 'category1', 'original_price', 'new_price', 'discount', 'similarity']]


In [10]:
recommend_semantic_discounted("crab")




Found product info:
final_category    Fish Mayo Salads
zip_code                      2400
store_name         Netto Emdrupvej
store_street         Emdrupvej 107
Name: 3, dtype: object


Unnamed: 0,zip_code,store_name,store_street,final_category,category1,original_price,new_price,discount,similarity
32,2400,Netto Utterslevvej,Utterslevvej 11,Other Mayo Salads,Dairy And Cold Storage,12.0,9,3.0,0.943635
5,2400,Netto Emdrupvej,Emdrupvej 107,Italian Mayo Salads,Dairy And Cold Storage,12.0,9,3.0,0.921086
134,2400,Netto Tuborgvej,Tuborgvej 239,Cold Cuts Chicken,Dairy And Cold Storage,15.95,8,7.95,0.763107
34,2400,Netto Utterslevvej,Utterslevvej 11,Sliced Lunch Meats,Dairy And Cold Storage,14.95,9,5.95,0.756558
17,2400,Netto Frederikssundsvej 52,Frederikssundsvej 52 ST.,Salami,Dairy And Cold Storage,10.95,8,2.95,0.739055


In [None]:
recommend_semantic_discounted("cheese")


In [None]:
recommend_semantic_discounted("salami")


In [None]:
recommend_semantic_discounted("ham")
