In [None]:
import os, re, json
from typing import List, Dict, Any, Tuple
import chromadb
from chromadb.utils import embedding_functions
import google.generativeai as genai
from difflib import SequenceMatcher
import pandas as pd
from tqdm import tqdm
import re
import ast

# ==========================================================
DB_PATH = "vector_new_db/recipes"   # your ChromaDB path
MODEL_NAME = "gemini-2.0-flash-lite"
GENAI_API_KEY = os.getenv('GENAI_API_KEY') 

TOP_K_DEFAULT = 3
OUTPUT_DIR = "/content/evaluation_results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ==========================================================
chroma_client = chromadb.PersistentClient(path=DB_PATH)
embedder = embedding_functions.SentenceTransformerEmbeddingFunction(
    model_name="BAAI/bge-m3"
)
collection = chroma_client.get_collection("recipes")

# ==========================================================
genai.configure(api_key=GENAI_API_KEY)
model = genai.GenerativeModel(MODEL_NAME)

# ==========================================================
TOKEN_CLEAN_RE = re.compile(r"[^a-zA-Z0-9\s\-]")
STOP_TOKENS = {"fresh","dried","chopped","sliced","minced","ground","crushed",
    "thinly","thick","thickly","small","medium","large","whole","skinless",
    "boneless","lean","extra","optional","to","taste","of","and","or"}

CANONICAL_MAP = {
    "chicken thigh":"chicken","chicken thighs":"chicken","chicken breast":"chicken",
    "beef rib":"beef ribs","beef ribs":"beef ribs","pork belly":"pork belly",
    "pork ribs":"pork ribs","pork":"pork",
    "tofu":"tofu","tofu skin":"yuba","bean curd skin":"yuba","yuba":"yuba",
    "garlic":"garlic","ginger":"ginger","chili oil":"chili oil",
    "soy sauce":"soy sauce","doubanjiang":"doubanjiang"
}
CONFLICTS = {("tofu","yuba"),("yuba","tofu")}

def _normalize(s:str)->str:
    s = s.lower().strip()
    s = TOKEN_CLEAN_RE.sub(" ", s)
    return re.sub(r"\s+"," ",s).strip()

def _canonicalize(s:str)->str:
    s_norm=_normalize(s)
    for k in sorted(CANONICAL_MAP,key=len,reverse=True):
        if re.search(rf"(?<![a-z0-9]){re.escape(k)}(?![a-z0-9])",s_norm):
            return CANONICAL_MAP[k]
    return s_norm

def _is_conflict(a,b): return (a,b) in CONFLICTS
def _similar(a,b): return SequenceMatcher(None,a,b).ratio()

def match_ing(a,b,th=0.88):
    a,b=_canonicalize(a),_canonicalize(b)
    if _is_conflict(a,b): return False
    if a==b: return True
    if re.search(rf"(?<![a-z0-9]){re.escape(a)}(?![a-z0-9])",b) or re.search(rf"(?<![a-z0-9]){re.escape(b)}(?![a-z0-9])",a):
        return True
    return _similar(a,b)>=th

# ==========================================================
class RecipeModelEvaluator:
    def __init__(self,collection,embedder,model,top_k:int=3):
        self.collection=collection
        self.embedder=embedder
        self.model=model
        self.top_k=top_k

    def _retrieve_recipes(self,query:str)->List[Dict[str,Any]]:
        emb=self.embedder([query])[0]
        res=self.collection.query(query_embeddings=[emb],n_results=self.top_k)
        docs=res.get("documents",[[]])[0]
        metas=res.get("metadatas",[[]])[0]
        out=[]
        for i in range(min(len(docs),self.top_k)):
            m=metas[i]
            out.append({
                "name":m.get("Recipes_name",""),
                "flavor":m.get("Flavor",""),
                "difficulty":m.get("Difficulty",""),
                "time":m.get("Estimated Cooking Time",""),
                "doc":docs[i]
            })
        return out

    def _build_prompt(self,query:str,cands:List[Dict[str,Any]])->str:
        blocks=[]
        for i,c in enumerate(cands,1):
            blocks.append(f"[Candidate {i}]\nName:{c['name']}\nFlavor:{c['flavor']}\nContent:\n{c['doc']}")
        schema={
            "recipe_name":"string",
            "chosen_from":"string",
            "ingredients":["string","..."],
            "steps":["string","..."],
        }
        return f"""
User request:
{query}

Output exactly ONE JSON object following this schema:
{json.dumps(schema,indent=2)}
"""

    def generate_recipe(self,prompt:str)->str:
        cands=self._retrieve_recipes(prompt)
        print(f"📦 Retrieved {len(cands)} candidates from Chroma")
        llm_prompt=self._build_prompt(prompt,cands)
        try:
            resp=self.model.generate_content(llm_prompt)
            return resp.text
        except Exception as e:
            return json.dumps({"recipe_name":"ERROR","chosen_from":"ERROR","ingredients":[],"steps":[],"reasoning":str(e)})

    def parse_json(self,text:str)->Dict[str,Any]:
        try: return json.loads(text)
        except: pass
        m=re.search(r"(\{.*\})",text,flags=re.DOTALL)
        if m:
            try: return json.loads(m.group(1))
            except: pass
        return {"recipe_name":"NO_MATCH","chosen_from":"NO_MATCH","ingredients":[],"steps":[],"reasoning":""}

    def calc_recall(self,user_ings:List[str],gen_ings:List[str])->Dict[str,Any]:
        u=[_normalize(x) for x in user_ings]
        g=[_normalize(x) for x in gen_ings]
        match=[]
        mg=set()
        for ui in u:
            for j,gi in enumerate(g):
                if j in mg: continue
                if match_ing(ui,gi):
                    match.append((ui,gi)); mg.add(j); break
        r=len(match)/len(u) if u else 0
        p=len(match)/len(g) if g else 0
        f=2*r*p/(r+p) if (r+p)>0 else 0
        return {"matched":len(match),"recall":r,"precision":p,"f1":f,"pairs":match}

# ==========================================================
model_eval = RecipeModelEvaluator(collection,embedder,model)


# ===== basic setting =====
df = pd.read_csv("recipes_prompt_label.csv")
df = df.iloc[0:600] # test on first 600 samples
BATCH_SIZE = 50
OUTPUT_DIR = "outputs/baseline"
os.makedirs(OUTPUT_DIR, exist_ok=True)

all_results = []  # store all metrics here

for i in tqdm(range(0, len(df), BATCH_SIZE), desc="Evaluating recipes"):
    batch = df.iloc[i:i + BATCH_SIZE]
    docs = batch["prompt"].tolist()
    ids = batch["ingredients/label"].tolist()

    for j, (prompt, gold_ings_raw) in enumerate(zip(docs, ids)):
        print("\n" + "=" * 80)
        print(f"🧩 Sample {i + j + 1}/{len(df)}")

        print("\n🔍 Prompt:", prompt)
        print("🎯 Ground Truth (raw):", gold_ings_raw)

        if isinstance(gold_ings_raw, str):
            try:
                gold_ings = ast.literal_eval(gold_ings_raw)
                if not isinstance(gold_ings, list):
                    gold_ings = [gold_ings]
            except Exception:
                gold_ings = [x.strip() for x in gold_ings_raw.split(",") if x.strip()]
        else:
            gold_ings = list(gold_ings_raw) if not isinstance(gold_ings_raw, list) else gold_ings_raw

        print("✅ Parsed Ground Truth (list):", gold_ings)

        raw_out = model_eval.generate_recipe(prompt)
        print("\n💬 Gemini Raw Output:\n", raw_out[:800])

        parsed = model_eval.parse_json(raw_out)
        gen_ings = parsed.get("ingredients", [])
        print("\n🥬 Extracted Ingredients:", gen_ings)

        metrics = model_eval.calc_recall(gold_ings, gen_ings)
        print("\n📊 Metrics:\n", json.dumps(metrics, indent=2, ensure_ascii=False))

        metrics_row = {"id": i + j + 1, "prompt": prompt}
        metrics_row.update(metrics)
        metrics_row["raw_output"] = raw_out
        all_results.append(metrics_row)

results_df = pd.DataFrame(all_results)
results_path = os.path.join(OUTPUT_DIR, "metrics_results.csv")
results_df.to_csv(results_path, index=False, encoding="utf-8-sig")

print(f"\n✅ 所有指标已保存到: {results_path}")

Evaluating recipes:   0%|          | 0/12 [00:00<?, ?it/s]


🧩 Sample 1/600

🔍 Prompt: I have pork belly, garlic, and chili oil. I want to make something spicy and Sichuan-style.
🎯 Ground Truth (raw): pork belly, garlic, chili oil
✅ Parsed Ground Truth (list): ['pork belly', 'garlic', 'chili oil']
📦 Retrieved 3 candidates from Chroma

💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Spicy Sichuan Pork Belly with Garlic and Chili Oil",
  "chosen_from": "User Request and Common Sichuan Recipes",
  "ingredients": [
    "500g pork belly, cut into 1-inch cubes",
    "2 tablespoons chili oil, plus extra for serving",
    "4 cloves garlic, minced",
    "1 tablespoon light soy sauce",
    "1 tablespoon dark soy sauce",
    "1 tablespoon Shaoxing wine (or dry sherry)",
    "1 teaspoon Sichuan peppercorns, toasted and ground",
    "1/2 teaspoon sugar",
    "1/4 cup water or chicken broth",
    "2 scallions, thinly sliced (for garnish)",
    "Cooked rice, for serving"
  ],
  "steps": [
    "In a pot, blanch the pork belly in boiling water for 5 minutes. 

Evaluating recipes:   8%|▊         | 1/12 [02:02<22:29, 122.66s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Simple Beef and Broccoli",
  "chosen_from": "User's Ingredients",
  "ingredients": [
    "Beef (sliced for stir-fry)",
    "Broccoli florets",
    "Soy sauce",
    "Cooking oil (e.g., vegetable or canola)",
    "Optional: Cornstarch (for thickening sauce)",
    "Optional: Garlic (minced)"
  ],
  "steps": [
    "Prepare the beef: If desired, toss the beef with a little cornstarch and a splash of soy sauce (optional for a tenderizing effect).",
    "Prepare the broccoli: Wash and cut the broccoli into bite-sized florets.",
    "Stir-fry the beef: Heat cooking oil in a wok or large skillet over high heat. Add the beef and stir-fry until browned (cook in batches if necessary to avoid overcrowding). Set aside.",
    "Stir-fry the broccoli: Add a bit more oil to the w

🥬 Extracted Ingredients: ['Beef (sliced for stir-fry)', 'Broccoli florets', 'Soy sauce', 'Cooking oil (e.g., vegetable or canola)', 'Optional: Cornstarch (for thickening sauce

Evaluating recipes:  17%|█▋        | 2/12 [03:54<19:21, 116.17s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Simple Beef and Pepper Stir-fry",
  "chosen_from": "User Request",
  "ingredients": [
    "Beef (thinly sliced or strips)",
    "Green bell peppers (sliced)",
    "Onion (sliced)",
    "Soy sauce",
    "Cornstarch",
    "Oil (vegetable or canola)",
    "Garlic (minced, optional)",
    "Ginger (minced, optional)",
    "Salt",
    "Black pepper",
    "Cooked rice (for serving)"
  ],
  "steps": [
    "Slice the beef into thin strips if not already prepared. Toss with 1 tablespoon cornstarch, a pinch of salt, and a pinch of black pepper.",
    "Slice the green peppers and onions.",
    "If using, mince garlic and ginger.",
    "Heat oil in a wok or large skillet over high heat until shimmering.",
    "Stir-fry the beef in a single layer until browned. Remove from th

🥬 Extracted Ingredients: ['Beef (thinly sliced or strips)', 'Green bell peppers (sliced)', 'Onion (sliced)', 'Soy sauce', 'Cornstarch', 'Oil (vegetable or canola)', 'Garlic (m

Evaluating recipes:  25%|██▌       | 3/12 [05:53<17:36, 117.36s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Roasted Duck Legs with Orange-Cranberry Glaze",
  "chosen_from": "Festive/Roasted Options",
  "ingredients": [
    "4 duck legs",
    "1 tbsp olive oil",
    "1 tsp salt",
    "1/2 tsp black pepper",
    "1 cup orange juice",
    "1/2 cup fresh cranberries",
    "1/4 cup honey",
    "1 tbsp orange zest",
    "1 sprig fresh rosemary"
  ],
  "steps": [
    "Preheat oven to 375°F (190°C).",
    "Pat the duck legs dry with paper towels. Score the skin of the duck legs in a criss-cross pattern.",
    "Rub the duck legs with olive oil, salt, and pepper.",
    "Place the duck legs in a roasting pan, skin-side up.",
    "Roast for 1 hour.",
    "While the duck is roasting, make the glaze: In a small saucepan, combine orange juice, cranberries, honey, and orange zest. Br

🥬 Extracted Ingredients: ['4 duck legs', '1 tbsp olive oil', '1 tsp salt', '1/2 tsp black pepper', '1 cup orange juice', '1/2 cup fresh cranberries', '1/4 cup honey', '1 tbsp 

Evaluating recipes:  33%|███▎      | 4/12 [07:45<15:22, 115.30s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Quick Tofu and Bok Choy Stir-fry",
  "chosen_from": "Ingredients on Hand",
  "ingredients": [
    "1 block firm tofu, pressed and cubed",
    "1 head bok choy, chopped",
    "1 tbsp soy sauce",
    "1 tsp sesame oil",
    "1 clove garlic, minced",
    "1/2 inch ginger, minced (optional)",
    "1 tbsp vegetable oil",
    "Pinch of red pepper flakes (optional)"
  ],
  "steps": [
    "Heat vegetable oil in a wok or large skillet over medium-high heat.",
    "Add tofu and stir-fry until golden brown, about 5-7 minutes.",
    "Add garlic and ginger (if using) and stir-fry for 30 seconds until fragrant.",
    "Add bok choy and stir-fry until wilted, about 2-3 minutes.",
    "Stir in soy sauce, sesame oil, and red pepper flakes (if using).",
    "Stir-fry for another m

🥬 Extracted Ingredients: ['1 block firm tofu, pressed and cubed', '1 head bok choy, chopped', '1 tbsp soy sauce', '1 tsp sesame oil', '1 clove garlic, minced', '1/2 inch ginge

Evaluating recipes:  42%|████▏     | 5/12 [09:37<13:19, 114.21s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Tangy Beef and Chili Stir-fry",
  "chosen_from": "User request",
  "ingredients": [
    "1 lb beef, thinly sliced (sirloin or flank steak recommended)",
    "1 tbsp cornstarch",
    "1 tbsp soy sauce",
    "1 tbsp black vinegar",
    "1 tbsp chili garlic sauce (or more, to taste)",
    "1 tbsp vegetable oil",
    "1 red bell pepper, sliced",
    "1 green bell pepper, sliced",
    "1 onion, sliced",
    "2 cloves garlic, minced",
    "1 inch ginger, grated",
    "Cooked rice, for serving"
  ],
  "steps": [
    "In a bowl, toss the sliced beef with cornstarch and soy sauce.",
    "In a separate small bowl, whisk together the black vinegar and chili garlic sauce.",
    "Heat vegetable oil in a wok or large skillet over high heat until shimmering.",
    "Add the bee

🥬 Extracted Ingredients: ['1 lb beef, thinly sliced (sirloin or flank steak recommended)', '1 tbsp cornstarch', '1 tbsp soy sauce', '1 tbsp black vinegar', '1 tbsp chili garli

Evaluating recipes:  50%|█████     | 6/12 [11:34<11:30, 115.11s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Simple Soy Sauce Tofu",
  "chosen_from": "Necessity!",
  "ingredients": [
    "Tofu",
    "Soy sauce"
  ],
  "steps": [
    "Open the tofu and drain any excess water (you can press it gently with paper towels if you want it firmer, but it's fine if you don't).",
    "Slice the tofu into bite-sized pieces.",
    "Place the tofu pieces on a plate or in a bowl.",
    "Drizzle soy sauce generously over the tofu.",
    "Eat immediately or let it sit for a few minutes to absorb more soy sauce (optional)."
  ]
}
```

🥬 Extracted Ingredients: ['Tofu', 'Soy sauce']

📊 Metrics:
 {
  "matched": 2,
  "recall": 1.0,
  "precision": 1.0,
  "f1": 1.0,
  "pairs": [
    [
      "tofu",
      "tofu"
    ],
    [
      "soy sauce",
      "soy sauce"
    ]
  ]
}

🧩 Sample 301/600

🔍 Prompt: I’ve got pork belly, chili oil, and garlic. I want something bold, spicy, and comforting.
🎯 Ground Truth (raw): pork belly, chili oil, garlic
✅ Parsed Ground Truth (lis

Evaluating recipes:  58%|█████▊    | 7/12 [13:27<09:31, 114.31s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Chilled Tofu with Ginger-Scallion Dressing",
  "chosen_from": "User request",
  "ingredients": [
    "1 block extra-firm tofu",
    "2 tablespoons soy sauce",
    "1 tablespoon rice vinegar",
    "1 tablespoon sesame oil",
    "1 tablespoon grated fresh ginger",
    "2 scallions, thinly sliced",
    "Optional: Chili flakes for garnish",
    "Optional: Toasted sesame seeds for garnish"
  ],
  "steps": [
    "Press the tofu to remove excess water. Wrap the tofu in paper towels and place a heavy object (like a book or pan) on top for at least 30 minutes.",
    "While the tofu is pressing, make the dressing: In a small bowl, whisk together the soy sauce, rice vinegar, sesame oil, and grated ginger.",
    "Slice the pressed tofu into bite-sized cubes or rectangles.",

🥬 Extracted Ingredients: ['1 block extra-firm tofu', '2 tablespoons soy sauce', '1 tablespoon rice vinegar', '1 tablespoon sesame oil', '1 tablespoon grated fresh ginger', '2 

Evaluating recipes:  67%|██████▋   | 8/12 [15:16<07:30, 112.65s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Quick Beef and Broccoli with Oyster Sauce",
  "chosen_from": "Ingredients available",
  "ingredients": [
    "Beef (thinly sliced)",
    "Broccoli florets",
    "Oyster sauce",
    "Soy sauce",
    "Cornstarch",
    "Cooking oil (e.g., vegetable or canola)",
    "Garlic (minced)",
    "Ginger (minced, optional)",
    "Cooked rice (for serving)"
  ],
  "steps": [
    "If using frozen beef, thaw it slightly. In a small bowl, whisk together 1 tbsp soy sauce, 1 tsp cornstarch, and a pinch of black pepper. Toss the beef in the mixture.",
    "Heat a tablespoon of cooking oil in a wok or large skillet over medium-high heat. Stir-fry the beef until browned, about 2-3 minutes. Remove from the skillet and set aside.",
    "Add another tablespoon of oil to the skillet. St

🥬 Extracted Ingredients: ['Beef (thinly sliced)', 'Broccoli florets', 'Oyster sauce', 'Soy sauce', 'Cornstarch', 'Cooking oil (e.g., vegetable or canola)', 'Garlic (minced)', 

Evaluating recipes:  75%|███████▌  | 9/12 [17:00<05:30, 110.15s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Quick Chili Oil Tofu",
  "chosen_from": "Pantry staples and user's provided ingredients",
  "ingredients": [
    "Firm or extra-firm tofu",
    "Chili oil",
    "Soy sauce (or tamari)",
    "Optional: Scallions or green onions, thinly sliced"
  ],
  "steps": [
    "Press the tofu: Place the tofu block between paper towels and weigh it down (a heavy book or plate works). Press for at least 5 minutes to remove excess water.",
    "Slice the tofu: Cut the pressed tofu into cubes or slabs, depending on your preference.",
    "Pan-fry the tofu (optional, for extra texture): Heat a little chili oil (or neutral oil) in a non-stick pan over medium-high heat. Add the tofu and pan-fry, flipping occasionally, until golden brown and slightly crispy on the outside. If you're

🥬 Extracted Ingredients: ['Firm or extra-firm tofu', 'Chili oil', 'Soy sauce (or tamari)', 'Optional: Scallions or green onions, thinly sliced']

📊 Metrics:
 {
  "matched": 2,

Evaluating recipes:  83%|████████▎ | 10/12 [18:43<03:35, 107.73s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Simple Pork and Mushroom Stir-fry",
  "chosen_from": "User Request",
  "ingredients": [
    "1 lb pork (cut into bite-sized pieces)",
    "8 oz mushrooms (sliced)",
    "1/4 cup soy sauce",
    "1 tbsp oil (vegetable or canola)",
    "1 clove garlic (minced)",
    "1/2 tsp ground ginger (optional)",
    "Cooked rice, for serving (optional)"
  ],
  "steps": [
    "Heat oil in a wok or large skillet over medium-high heat.",
    "Add pork and stir-fry until browned and cooked through.",
    "Add mushrooms and garlic to the pan. Stir-fry until mushrooms are softened, about 3-5 minutes.",
    "Stir in soy sauce and ginger (if using).",
    "Cook for another minute, allowing the sauce to thicken slightly.",
    "Serve over rice, if desired."
  ]
}
```

🥬 Extracted Ingredients: ['1 lb pork (cut into bite-sized pieces)', '8 oz mushrooms (sliced)', '1/4 cup soy sauce', '1 tbsp oil (vegetable or canola)', '1 clove garlic (minced)', '1/2 tsp grou

Evaluating recipes:  92%|█████████▏| 11/12 [20:31<01:47, 107.97s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Spicy Tofu and Mushroom Stir-Fry",
  "chosen_from": "User Request",
  "ingredients": [
    "1 block firm tofu, pressed and cubed",
    "8 oz mushrooms, sliced",
    "2 tablespoons chili oil",
    "1 tablespoon soy sauce",
    "1 teaspoon cornstarch",
    "1 clove garlic, minced",
    "1/2 inch ginger, grated (optional)",
    "Cooked rice, for serving",
    "Green onions, chopped (for garnish, optional)"
  ],
  "steps": [
    "Press the tofu: Wrap the tofu in paper towels and place something heavy on top (like a book or cutting board) for at least 15 minutes to remove excess water.",
    "Prepare the sauce: In a small bowl, whisk together the soy sauce and cornstarch.",
    "Stir-fry the tofu: Heat 1 tablespoon of chili oil in a wok or large skillet over medium-h

🥬 Extracted Ingredients: ['1 block firm tofu, pressed and cubed', '8 oz mushrooms, sliced', '2 tablespoons chili oil', '1 tablespoon soy sauce', '1 teaspoon cornstarch', '1 cl

Evaluating recipes: 100%|██████████| 12/12 [22:18<00:00, 111.52s/it]


💬 Gemini Raw Output:
 ```json
{
  "recipe_name": "Simple Soy-Braised Pork Ribs",
  "chosen_from": "User request",
  "ingredients": [
    "Pork ribs",
    "Soy sauce",
    "Sugar",
    "Water (or chicken broth, optional)",
    "Garlic cloves (optional)",
    "Ginger, peeled and sliced (optional)",
    "Star anise (optional)"
  ],
  "steps": [
    "Rinse the pork ribs and pat them dry.",
    "If desired, brown the ribs in a pan over medium-high heat with a little oil until lightly browned on all sides. This adds flavor, but is optional.",
    "In a pot or Dutch oven, combine the ribs, soy sauce, sugar, and enough water (or broth) to mostly submerge the ribs. If using, add garlic, ginger, and star anise.",
    "Bring the liquid to a simmer over medium heat.  ",
    "Reduce the heat to low, cover the pot, and let t

🥬 Extracted Ingredients: ['Pork ribs', 'Soy sauce', 'Sugar', 'Water (or chicken broth, optional)', 'Garlic cloves (optional)', 'Ginger, peeled and sliced (optional)', 'Star an




In [12]:
results_df.to_csv(results_path, index=False, encoding="utf-8-sig")