In [1]:
from chromaDB import open_or_build_chroma_from_csv, embed_text, ChromaAdapter
import os

In [2]:
csv_path = '../data/FNDDS/2019-2020 FNDDS - Foods and Beverages - Text.csv'
persist_dir = "./nutribench/chroma_fndds"
collection_name = "fndds"

In [3]:
if os.path.exists(persist_dir):
    print(f"ChromaDB directory {persist_dir} already exists. Skipping creation.")
    rebuild = False
else:
    print(f"Creating ChromaDB directory {persist_dir}...")
    rebuild = True

ChromaDB directory ./nutribench/chroma_fndds already exists. Skipping creation.


In [4]:
collection = open_or_build_chroma_from_csv(
    csv_path=csv_path,
    persist_dir=persist_dir,
    collection_name=collection_name,
    rebuild=rebuild,        # <- change to True to rebuild
    batch_size=512
)

vectordb = ChromaAdapter(collection, embed_text)

[Chroma] Reusing existing collection 'fndds' with 5624 items at ./nutribench/chroma_fndds


In [5]:
import os
import csv
import copy
import pandas as pd
from tqdm import tqdm
from time import sleep
from nutribench.DietAI24_OpenAI import DietAI24

In [6]:
model_name = "gpt-4.1-mini"
path_images_base = "../data/nutribench"
path_results = f"../output/nutribench_dietai24_{model_name}.csv"
os.makedirs(os.path.dirname(path_results), exist_ok=True)

In [7]:
fields = ["meal_description", "meal_description_MLLM", "food_code_MLLM", "food_code_description_MLLM", "weight_grams_MLLM", "weight_reasoning_MLLM"]
df_meals = pd.read_csv(f"{path_images_base}/selected_nutribench_v2.csv")
df_fndds = pd.read_csv("../data/FNDDS/2019-2020 FNDDS At A Glance - Foods and Beverages.csv")
df_portions_weights = pd.read_csv("../data/FNDDS/2019-2020 FNDDS At A Glance - Portions and Weights - Dropna.csv")
ls_meal_descriptions = df_meals["meal_description"].tolist()

In [None]:
with open(path_results, "w", newline='', encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=fields)
    writer.writeheader()

    for meal_description in tqdm(ls_meal_descriptions, desc="Processing meal descriptions"):
        try:  
            recognizer = DietAI24(model_name=model_name, vectordb=vectordb)
            food_response = recognizer.recognize_food_from_text(meal_description)
            food_codes = food_response.get("food_codes", "")
            meal_description_mllm = food_response.get("normalized_description", "")
            food_assignments = food_response.get("assignments", [])

            if not food_response.get("ok", False):
                print(f"No food codes detected for {meal_description}. Skipping.")
                continue

            for food_assignment in food_assignments:
                food_code = food_assignment.get("code", "")
                food_code_int = int(food_code) if food_code.isdigit() else None
                if food_code_int is None:
                    print(f"Invalid food code {food_code} for {meal_description}. Skipping.")
                    continue
                
                food_code_description = food_assignment.get("item", "")
                df_reference = df_portions_weights[df_portions_weights["Food code"] == food_code_int][[
                    "Main food description", "Portion description", "Portion weight (g)"]]
                
                # Estimate weights and reasoning
                if len(df_reference) > 0:
                    portion_reference_text = recognizer.portion_reference_to_text(df_reference)
                    weights_response = recognizer.estimate_weight_before_eating(
                        meal_text=meal_description,
                        assumed_food=food_code_description,
                        portion_reference_text=portion_reference_text
                    )
                else:
                    weights_response = recognizer.estimate_weight_before_eating(
                        meal_text=meal_description,
                        assumed_food=food_code_description,
                        use_weight_reference=False
                    )

                # Get results, handle missing
                weight_grams = weights_response.get('weight_grams', None)
                weight_reasoning = weights_response.get('reasoning', "")

                row = {
                    "meal_description": meal_description,
                    "meal_description_MLLM": meal_description_mllm,
                    "food_code_MLLM": food_code_int,
                    "food_code_description_MLLM": food_code_description,
                    "weight_grams_MLLM": weight_grams,
                    "weight_reasoning_MLLM": weight_reasoning,
                }
                writer.writerow(row)
                sleep(0.2)  # Avoid rate limits
        except Exception as e:
            print(f"Error processing {meal_description}: {e}")
            continue

Processing meal descriptions:   0%|          | 3/1500 [01:00<8:26:57, 20.32s/it]


KeyboardInterrupt: 

In [9]:
food_response

{'ok': True,
 'food_codes': ['27510245', '75506010', '74401010', '71401030'],
 'normalized_description': 'A cheeseburger with mustard and ketchup, and a small order of French fries.',
 'items': [{'label': 'cheeseburger', 'details': ''},
  {'label': 'mustard', 'details': 'packet'},
  {'label': 'ketchup', 'details': 'tablespoon'},
  {'label': 'French fries', 'details': 'small order'}],
 'context_text': 'Food code: 27510245 | The image shows the food category of cheeseburger, on white bun, 1 large patty. Additional details include on bread or roll; not specified subcategory as to grain; third pound size patty.\nFood code: 27510246 | The image shows the food category of cheeseburger, on wheat bun, 1 large patty. Additional details include on bread or roll; multigrain, whole grain or whole wheat; quarter pound or regular size patty.\nFood code: 27510195 | The image shows the food category of cheeseburger, on white bun, 1 small patty. Additional details include homemade; bun, not further spe