In [1]:
import os

from dotenv import load_dotenv
from langchain.chat_models import init_chat_model

load_dotenv()
openai_api_key = os.environ.get("OPENAI_API_KEY")
model_version_nutrition = os.environ.get("MODEL_VERSION_NUTRITION")
model_version_ingredients = os.environ.get("MODEL_VERSION_INGREDIENTS")

llm_ingredients = init_chat_model(model_version_ingredients, model_provider="openai")
llm_nutrition = init_chat_model(model_version_nutrition, model_provider="openai")


In [2]:
from typing import Optional, List
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate


class Ingredients(BaseModel):
    name: str = Field(description="Name of the ingredient")
    quantity: Optional[str] = Field(None, description="Amount or measurement of the ingredient")

class Dish(BaseModel):
    dish_name: str = Field(description="Name of the dish")
    quantity: Optional[str] = Field(None, description="Serving size or amount of the dish")
    ingredients: List[Ingredients] = Field(description="List of ingredient items for this dish")

class Dishes(BaseModel):
    dishes: List[Dish] = Field(description="List of dishes with their quantities and items")

structured_llm_ingredients = llm_ingredients.with_structured_output(Dishes)

prompt_ingredients = ChatPromptTemplate.from_messages([
    ("system", "You are a professional nutritionist dedicated to analyzing meal information."),
    ("human",
     "For each dish in the meal, return:\n"
     "- dish_name\n"
     "- quantity (serving size)\n"
     "- ingredients: list of all ingredients.\n"
     "For every ingredient item, ALWAYS include a quantity, even if approximate "
     "(e.g. '2 oz', '1 tbsp', 'a handful').\n\n"
     "Meal: {user_input}")
])


chain_ingredients = prompt_ingredients | structured_llm_ingredients


user_input = "A slice of pepperoni pizza and caesar salad"
result_ingredients = chain_ingredients.invoke({"user_input": user_input})


In [3]:
import json
result_ingredients_dict = result_ingredients.model_dump()
print(json.dumps(result_ingredients_dict, indent=2))

{
  "dishes": [
    {
      "dish_name": "Pepperoni Pizza",
      "quantity": "1 slice",
      "ingredients": [
        {
          "name": "Pizza crust",
          "quantity": "1 slice"
        },
        {
          "name": "Tomato sauce",
          "quantity": "3 tbsp"
        },
        {
          "name": "Mozzarella cheese",
          "quantity": "1 oz"
        },
        {
          "name": "Pepperoni slices",
          "quantity": "5 slices"
        }
      ]
    },
    {
      "dish_name": "Caesar Salad",
      "quantity": "1 serving",
      "ingredients": [
        {
          "name": "Romaine lettuce",
          "quantity": "2 cups"
        },
        {
          "name": "Caesar dressing",
          "quantity": "2 tbsp"
        },
        {
          "name": "Parmesan cheese",
          "quantity": "1 oz"
        },
        {
          "name": "Croutons",
          "quantity": "a handful"
        }
      ]
    }
  ]
}


In [6]:
from typing import Optional, List
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate


class Nutrition(BaseModel):
    dish_names: str = Field(description="Name of the food item") 
    calories: Optional[float] = Field(None, description="Calories per serving")
    protein_g: Optional[float] = Field(None, description="Protein in grams")
    carbs_g: Optional[float] = Field(None, description="Carbohydrates in grams")
    fat_g: Optional[float] = Field(None, description="Fat in grams")

class NutritionList(BaseModel):
    dishes: List[Nutrition]

structured_llm_nutrition = llm_nutrition.with_structured_output(NutritionList)

prompt_nutrition = ChatPromptTemplate.from_messages([
    ("system", "You are a professional nutritionist dedicated to analyzing meal information."),
    ("human",
     "For each of these dishes based on their ingredients, return calories, protein_g, carbs_g, and fat_g per serving:\n{dish_names}")
])

chain_nutrition = prompt_nutrition | structured_llm_nutrition


dish_names_list = [d.dish_name for d in (result_ingredients.dishes or []) if getattr(d, "dish_name", None)]
dish_names_str = "\n".join(f"- {n}" for n in dish_names_list) if dish_names_list else "- unknown dish"

result_nutrition = chain_nutrition.invoke({"dish_names": dish_names_str})




In [7]:
import json

result_nutrition_dict = result_nutrition.model_dump()
print(json.dumps(result_nutrition_dict, indent=2))

{
  "dishes": [
    {
      "dish_names": "Pepperoni Pizza",
      "calories": 285.0,
      "protein_g": 12.0,
      "carbs_g": 36.0,
      "fat_g": 12.0
    },
    {
      "dish_names": "Caesar Salad",
      "calories": 350.0,
      "protein_g": 10.0,
      "carbs_g": 9.0,
      "fat_g": 30.0
    }
  ]
}


In [6]:
import pandas as pd

df_pred = pd.read_csv("src/nutribench_estimated.csv")

df_pred = df_pred.rename(columns={
    "carb": "pred_carb",
    "fat": "pred_fat",
    "energy": "pred_energy",
    "protein": "pred_protein"
})

df_pred = df_pred.drop(columns=["country", "serving_type"])
df_pred.head()

Unnamed: 0,meal_description,pred_carb,pred_fat,pred_energy,pred_protein
0,"For breakfast, I ate a plain bun weighing 126 ...",90.0,3.78,448.2,10.08
1,"For lunch, I had 171 grams of boiled fresh gro...",60.0,42.27,682.89,25.74
2,"I’ve got 9 grams of boiled kasepa fish, 34 gra...",40.53,8.05,279.92,7.11
3,I had a breakfast consisting of 51g of raw whi...,128.06,8.88,645.15,16.53
4,"For dinner, I had 105g of raw maize flour alon...",81.23,4.99,397.49,8.9


In [7]:
from datasets import load_dataset

dataset = load_dataset("dongx1997/NutriBench")

df_ref = dataset["train"].to_pandas()
df_ref = df_ref.drop(columns=["country", "serving_type"])

df_ref.head()

Unnamed: 0,meal_description,carb,fat,energy,protein
0,"For breakfast, I ate a plain bun weighing 126 ...",90.8,4.2,439.0,9.6
1,"For lunch, I had 171 grams of boiled fresh gro...",97.8,37.8,806.0,27.9
2,"I’ve got 9 grams of boiled kasepa fish, 34 gra...",51.2,14.2,363.0,8.7
3,I had a breakfast consisting of 51g of raw whi...,163.8,8.9,792.0,19.0
4,"For dinner, I had 105g of raw maize flour alon...",81.1,4.6,390.0,8.4


In [9]:
merged_df = pd.merge(df_pred, df_ref, on="meal_description", how="inner")

merged_df.head()

Unnamed: 0,meal_description,pred_carb,pred_fat,pred_energy,pred_protein,carb,fat,energy,protein
0,"For breakfast, I ate a plain bun weighing 126 ...",90.0,3.78,448.2,10.08,90.8,4.2,439.0,9.6
1,"For lunch, I had 171 grams of boiled fresh gro...",60.0,42.27,682.89,25.74,97.8,37.8,806.0,27.9
2,"I’ve got 9 grams of boiled kasepa fish, 34 gra...",40.53,8.05,279.92,7.11,51.2,14.2,363.0,8.7
3,I had a breakfast consisting of 51g of raw whi...,128.06,8.88,645.15,16.53,163.8,8.9,792.0,19.0
4,"For a snack, I had 48g of boiled white navy be...",49.27,9.54,324.54,13.88,127.3,13.0,703.0,25.8
