In [1]:
import pandas as pd
import pickle
import os

# Load CSV
df = pd.read_csv(r"D:\food_recommender\data\Food Ingredients and Recipe Dataset with Cleaned Ingredients.csv")

# Replace NaN
df = df.fillna("")

image_folder = r"D:\food_recommender\data\Food Images"

df["Image_Path"] = df["Image_Name"].apply(lambda x: os.path.join(image_folder, f"{x}.jpg"))

# Create embeddings folder if missing
os.makedirs(r"D:\food_recommender\embeddings", exist_ok=True)

# Build meta dictionary
meta = {
    "title": df["Title"].tolist(),
    "ingredients": df["Ingredients"].tolist(),
    "cleaned_ingredients": df["Cleaned_Ingredients"].tolist(),
    "instructions": df["Instructions"].tolist(),
    "image_path": df["Image_Path"].tolist(),
}

# Save meta.pkl
with open(r"D:\food_recommender\embeddings\meta.pkl", "wb") as f:
    pickle.dump(meta, f)

print("meta.pkl created successfully!")



meta.pkl created successfully!


In [3]:
import torch
from transformers import CLIPModel, CLIPProcessor
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

text_embs = []

texts = df["Cleaned_Ingredients"].tolist()
MAX_LEN = 77  # CLIP limit

for text in tqdm(texts, desc="Embedding text"):

    # ---- Tokenize first so we can truncate ----
    tokens = processor.tokenizer(
        text,
        truncation=True,
        max_length=MAX_LEN,
        return_tensors="pt"
    )

    with torch.no_grad():
        emb = model.get_text_features(
            input_ids=tokens["input_ids"],
            attention_mask=tokens["attention_mask"]
        )

    text_embs.append(emb.cpu().numpy()[0])

text_embs = np.array(text_embs)


print("DONE — text embeddings + meta saved")


Embedding text: 100%|██████████| 13501/13501 [32:13<00:00,  6.98it/s] 


DONE — text embeddings + meta saved


In [5]:
os.makedirs(r"D:\food_recommender\embeddings", exist_ok=True)

np.save(r"D:\food_recommender\embeddings\text_embeddings.npy", text_embs)


In [9]:
import faiss
import numpy as np

# Load embeddings
embeds = np.load(r"D:/food_recommender/embeddings/text_embeddings.npy").astype("float32")

d = embeds.shape[1]        # dimension của embedding
index = faiss.IndexFlatL2(d)  # index L2
index.add(embeds)             # add embeddings theo thứ tự meta.pkl

# Save FAISS index
faiss.write_index(index, r"D:/food_recommender/embeddings/faiss_index.bin")


In [11]:
import faiss
import numpy as np
import pickle

# Load FAISS index
index = faiss.read_index("D:/food_recommender/embeddings/faiss_index.bin")

# Load meta
with open("D:/food_recommender/embeddings/meta.pkl", "rb") as f:
    meta = pickle.load(f)

def search_recipe(query_emb, k=5):
    query_emb = query_emb.astype("float32")
    D, I = index.search(query_emb.reshape(1, -1), k)

    results = []
    for idx, dist in zip(I[0], D[0]):
        results.append({
            "index": int(idx),
            "distance": float(dist),
            "title": meta["title"][idx],
            "ingredients": meta["cleaned_ingredients"][idx],
            "image": meta["image_path"][idx]
        })

    return results

def get_text_embedding(text, model, processor):
    tokens = processor.tokenizer(
        text,
        truncation=True,
        max_length=77,
        return_tensors="pt"
    )

    with torch.no_grad():
        emb = model.get_text_features(
            input_ids=tokens["input_ids"],
            attention_mask=tokens["attention_mask"]
        )

    return emb.numpy()[0]


# Example: search "chicken garlic soy sauce"
query_text = "chicken garlic soy sauce"
query_emb = get_text_embedding(query_text, model, processor)

results = search_recipe(query_emb, k=5)
for r in results:
    print(r["index"], r["title"])


8805 Sweet Garlic Soy Sauce
3586 Sweet Balsamic Glazed Chicken
1813 Sesame Chicken Drumettes
9015 Sesame Quail Eggs
5348 Stir-Fried Sesame Baby Bok Choy


In [12]:
df = pd.read_csv(r"D:\food_recommender\data\Food Ingredients and Recipe Dataset with Cleaned Ingredients.csv")

In [13]:
df.head()

Unnamed: 0,Title,Ingredients,Instructions,Image_Name,Cleaned_Ingredients
0,Miso-Butter Roast Chicken With Acorn Squash Pa...,"['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...","Pat chicken dry with paper towels, season all ...",miso-butter-roast-chicken-acorn-squash-panzanella,purpose bread onion oil total cored olive as r...
1,Crispy Salt and Pepper Potatoes,"['2 large egg whites', '1 pound new potatoes (...",Preheat oven to 400°F and line a rimmed baking...,crispy-salt-and-pepper-potatoes-dan-kluger,salt about in thyme kosher new pepper potatoes...
2,Thanksgiving Mac and Cheese,"['1 cup evaporated milk', '1 cup whole milk', ...",Place a rack in middle of oven; preheat to 400...,thanksgiving-mac-and-cheese-erick-williams,onion cheese sharp full elbow macaroni pepper ...
3,Italian Sausage and Bread Stuffing,"['1 (¾- to 1-pound) round Italian loaf, cut in...",Preheat oven to 350°F with rack in middle. Gen...,italian-sausage-and-bread-stuffing-240559,celery turkey heavy ribs cubes oil olive butte...
4,Newton's Law,"['1 teaspoon dark brown sugar', '1 teaspoon ho...",Stir together brown sugar and hot water in a c...,newtons-law-apple-bourbon-cocktail,hot butter water garnish cinnamon apple orange...


In [None]:
meta["title"][8805]

'Sweet Garlic Soy Sauce'