In [2]:
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import torch
import numpy as np
import pickle
from tqdm import tqdm

with open(r"D:\food_recommender\embeddings\meta_clean_original_paths.pkl", "rb") as f:
    meta = pickle.load(f)

In [3]:
import pandas as pd
df = pd.DataFrame(meta)
df.head()

Unnamed: 0,title,ingredients,cleaned_ingredients,instructions,image_path
0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...",purpose bread onion oil total cored olive as r...,"Pat chicken dry with paper towels, season all ...",D:\food_recommender\data\Food Images\miso-butt...
1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",salt about in thyme kosher new pepper potatoes...,Preheat oven to 400°F and line a rimmed baking...,D:\food_recommender\data\Food Images\crispy-sa...
2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",onion cheese sharp full elbow macaroni pepper ...,Place a rack in middle of oven; preheat to 400...,D:\food_recommender\data\Food Images\thanksgiv...
3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",celery turkey heavy ribs cubes oil olive butte...,Preheat oven to 350°F with rack in middle. Gen...,D:\food_recommender\data\Food Images\italian-s...
4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",hot butter water garnish cinnamon apple orange...,Stir together brown sugar and hot water in a c...,D:\food_recommender\data\Food Images\newtons-l...


In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")

BAD_POS = {
    "ADP",   # prepositions
    "CCONJ", # and/or
    "SCONJ",
    "ADV",   # adverbs
    "DET",   # a, the
    "PRON",  # it, they
    "VERB",  # chopped, sliced
    "AUX",
    "PART",
    "NUM",
    "SYM",
    "PUNCT"
}

def clean_ingredients_spacy(text, max_tokens=20):
    doc = nlp(text.lower())

    tokens = []
    for token in doc:
        if token.pos_ in BAD_POS:
            continue
        if len(token.text) <= 2:
            continue

        tokens.append(token.lemma_)

    # remove duplicates, preserve order
    tokens = list(dict.fromkeys(tokens))

    return tokens[:max_tokens]


In [None]:
def ingredients_to_caption_spacy(text):
    tokens = clean_ingredients_spacy(text)

    if not tokens:
        return "A food dish."

    return "A dish made with " + ", ".join(tokens) + "."


In [None]:
import torch
import numpy as np
from transformers import CLIPModel, CLIPProcessor
from tqdm import tqdm

model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

evals_1 = ingredients_to_caption_spacy(df['cleaned_ingredients'])

text_embeds = []

for text in tqdm(evals_1, desc="Embedding ingredient captions"):
    inputs = processor(
        text=text,
        return_tensors="pt",
        truncation=True,
        max_length=77
    )

    with torch.no_grad():
        emb = model.get_text_features(**inputs)

    # normalize
    emb = emb / emb.norm(dim=-1, keepdim=True)

    text_embeds.append(emb.numpy()[0])

text_embeds = np.array(text_embeds, dtype="float32")


print("✅ Done:", text_embeds.shape)


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Embedding ingredient captions: 100%|██████████| 13471/13471 [31:44<00:00,  7.07it/s] 


✅ Done: (13471, 768)


In [None]:
np.save(r"D:\food_recommender\embeddings\text_embeddings_cleaned_captioned_and_normalized.npy", text_embeds)

In [None]:
import numpy as np
import faiss
import pickle

# ============================
#  Load embeddings + meta
# ============================
EMB_PATH = r"D:\food_recommender\embeddings\text_embeddings_cleaned_captioned_and_normalized.npy"

# ============================
#  Create FAISS index
# ============================
embeds = np.load(EMB_PATH).astype("float32")
d = embeds.shape[1]   # embedding dimension
index = faiss.IndexFlatL2(d)  # L2 cosine-normalized = works well

index.add(embeds)  # add all embeddings
print("FAISS index size:", index.ntotal)

# ============================
#  Save index
# ============================
faiss.write_index(index, r"D:/food_recommender/embeddings/faiss_text_index_final.bin")
print("Saved FAISS index → faiss_image.index")

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load
image_embeds = np.load(r"D:\food_recommender\embeddings\image_embeds_all.npy").astype("float32")
text_embeds  = np.load(r"D:\food_recommender\embeddings\text_embeddings_cleaned_captioned_and_normalized.npy ").astype("float32")

assert image_embeds.shape == text_embeds.shape

# cosine similarity = dot product because normalized
sim_matrix = text_embeds @ image_embeds.T



# Calculate Recall@K

In [10]:
def recall_at_k(sim_matrix, k):
    N = sim_matrix.shape[0]
    hits = 0

    for i in range(N):
        top_k = np.argsort(-sim_matrix[i])[:k]
        if i in top_k:
            hits += 1

    return hits / N


# Calculate Mean Reciprocal Rank (MRR)

In [11]:
def mean_reciprocal_rank(sim_matrix):
    N = sim_matrix.shape[0]
    rr_sum = 0.0

    for i in range(N):
        ranking = np.argsort(-sim_matrix[i])
        rank = np.where(ranking == i)[0][0] + 1
        rr_sum += 1 / rank

    return rr_sum / N


# Calculate Mean Average Precision (MAP)

In [12]:
def mean_average_precision(sim_matrix):
    N = sim_matrix.shape[0]
    ap_sum = 0.0

    for i in range(N):
        ranking = np.argsort(-sim_matrix[i])
        rank = np.where(ranking == i)[0][0] + 1
        ap_sum += 1 / rank  # only 1 relevant item

    return ap_sum / N


Because each query has only one relevant image,  
=> MAP == MRR (this is mathematically correct)

In [13]:
for k in [1, 5, 10]:
    print(f"Recall@{k}: {recall_at_k(sim_matrix, k):.4f}")

print("MRR:", mean_reciprocal_rank(sim_matrix))
print("MAP:", mean_average_precision(sim_matrix))


Recall@1: 0.2039
Recall@5: 0.4216
Recall@10: 0.5244
MRR: 0.3089963381289362
MAP: 0.3089963381289362
