Goal: separate the ingredients in the FoodKG table into a list of ingredients

In [1]:
import json
import os

import pandas as pd
from dotenv import load_dotenv

from llm_util import process_food_kg_df, assemble_food_kg_df

load_dotenv()  # Take environment variables from .env
api_key = os.getenv("OPENAI_API_KEY")

from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:1234/v1",  # local model
    api_key=api_key
)


In [2]:
df = pd.read_csv("data/raw/nourish_public_FoodKG.csv")

In [3]:
df.head(5)

Unnamed: 0,id,title,ingredients,directions,link,source,ner
0,2229479,Double Chocolate Chewy Cookies,"['3/4 cup softened salted butter', '1 1/2 cup ...","['Heat oven to 350F.', 'Cream butter and sugar...",cookpad.com/us/recipes/339167-double-chocolate...,Recipes1M,"['butter', 'sugar', 'eggs', 'vanilla', 'flour'..."
1,2229480,"""Old Fashioned Potato Soup - ""Homemade"" Potato...","['6 Potatoes, peeled, cubes', '2 Leeks, washed...",['Put all ingredients except milk and chives i...,online-cookbook.com/goto/cook/rpage/0003A6,Recipes1M,"['Potatoes', 'Leeks', 'Onions', 'Ham bone', 'C..."
2,2229492,Persian pasta with spiced chickpeas and pistac...,"['250 g (8.8oz) spaghetti', '1 tin of chickpea...","['Boil a large pot of water for your pasta.', ...",www.lovefood.com/guide/recipes/17869/persian-p...,Recipes1M,"['chickpeas', 'onion', 'tomatoes', 'clove of g..."
3,2229494,Shawn & Lauras Kitchen Sink Feijoada,"['1 12 tablespoons garlic (chopped)', '1 shall...","['Chop garlic, shallot, green onions, parsley ...",www.food.com/recipe/shawn-lauras-kitchen-sink-...,Recipes1M,"['garlic', 'shallot', 'Italian parsley', 'oreg..."
4,2229498,The 4:2:1 Golden Ratio for Always Delicious Ta...,"['4 Eggs (large)', '2 tbsp Sugar', '1 tsp Salt...","['Break the eggs into a bowl.', 'Put all the i...",cookpad.com/us/recipes/145525-the-421-golden-r...,Recipes1M,"['Eggs', 'Sugar', 'Salt', 'Mayonnaise']"


In [4]:
model = "qwen/qwen3-4b-2507"
# model="openai/gpt-oss-20b",

process_food_kg_df(
    df=df,
    model=model,
    client=client,
    restart=True,  # reset the checkpoints
    batch_size=100,
    stop_at=1000
)

Wrote part_0_99.csv (0:100) — checkpoint=99
Wrote part_100_199.csv (100:200) — checkpoint=199
Wrote part_200_299.csv (200:300) — checkpoint=299
Wrote part_300_399.csv (300:400) — checkpoint=399
Wrote part_400_499.csv (400:500) — checkpoint=499
Wrote part_500_599.csv (500:600) — checkpoint=599
Wrote part_600_699.csv (600:700) — checkpoint=699
Wrote part_700_799.csv (700:800) — checkpoint=799
Wrote part_800_899.csv (800:900) — checkpoint=899
Wrote part_900_999.csv (900:1000) — checkpoint=999
Done.


In [5]:
final_df = assemble_food_kg_df()

In [6]:
len(final_df)

1000

In [7]:
# CSV can’t store native Python lists (they are objects), so use JSON dump
ingredients_table = pd.DataFrame({
    "id": final_df["id"],
    "title": final_df["title"],
    "ingredients": final_df["ingredients"],  # new, kept upon request
    "ingredients_normalized": final_df["ingredients_normalized"].apply(json.dumps),
}).sort_values("id")
# Create a directory if it doesn't exist and save

In [8]:
ingredients_table.head(5)

Unnamed: 0,id,title,ingredients,ingredients_normalized
0,0,No-Bake Nut Cookies,"['1 c. firmly packed brown sugar', '1/2 c. eva...","""['brown sugar', 'evaporated milk', 'vanilla',..."
1,1,Jewell Ball'S Chicken,"['1 small jar chipped beef, cut up', '4 boned ...","""['chipped beef', 'chicken breast', 'cream of ..."
2,2,Creamy Corn,"['2 (16 oz.) pkg. frozen corn', '1 (8 oz.) pkg...","""['corn', 'cream cheese', 'butter', 'garlic po..."
3,3,Chicken Funny,"['1 large whole chicken', '2 (10 1/2 oz.) cans...","""['chicken', 'chicken gravy', 'mushroom soup',..."
4,4,Reeses Cups(Candy),"['1 c. peanut butter', '3/4 c. graham cracker ...","""['peanut butter', 'graham cracker crumb', 'bu..."


In [9]:
os.makedirs("data/output", exist_ok=True)
ingredients_table.to_csv("data/output/processed_ingredients.csv", index=False)

In [10]:
final_df.head()

Unnamed: 0,id,title,ingredients,ingredients_normalized
0,0,No-Bake Nut Cookies,"['1 c. firmly packed brown sugar', '1/2 c. eva...","['brown sugar', 'evaporated milk', 'vanilla', ..."
1,1,Jewell Ball'S Chicken,"['1 small jar chipped beef, cut up', '4 boned ...","['chipped beef', 'chicken breast', 'cream of m..."
2,2,Creamy Corn,"['2 (16 oz.) pkg. frozen corn', '1 (8 oz.) pkg...","['corn', 'cream cheese', 'butter', 'garlic pow..."
3,3,Chicken Funny,"['1 large whole chicken', '2 (10 1/2 oz.) cans...","['chicken', 'chicken gravy', 'mushroom soup', ..."
4,4,Reeses Cups(Candy),"['1 c. peanut butter', '3/4 c. graham cracker ...","['peanut butter', 'graham cracker crumb', 'but..."
