In [4]:
import requests
import pandas as pd
import time

# USDA API key
USDA_API_KEY = "0tJC81LJtbQIkk4mlAmp8t8Q9TrsbyLp4DiM46bL"
OUTPUT_FILE = "usda_1000_nutrition_data.csv"
DESIRED_COUNT = 1000

# Search terms to get diverse foods
SEARCH_TERMS = [
    "chili", "waffles", "sandwich", "biryani", "idli", "paratha", "udon", "kimchi",
    "paneer", "pho", "dumplings", "risotto", "gnocchi", "miso", "lamb", "okra", "plantain",
    "ramen", "muffin", "pudding", "samosa", "falafel", "cornbread", "tortilla", "brussels sprouts",
    "clams", "lobster", "macaroni", "beet", "turnip", "radish", "barley", "oats", "kale",
    "cabbage", "leek", "cucumber", "squash", "eggplant", "jackfruit", "guava", "papaya",
    "pomegranate", "fig", "date", "almond", "cashew", "pecan", "walnut", "hazelnut"
]

def get_nutrition_data(search_term, page=1):
    url = (
        f"https://api.nal.usda.gov/fdc/v1/foods/search"
        f"?query={search_term}&pageSize=25&pageNumber={page}&api_key={USDA_API_KEY}"
    )
    response = requests.get(url)
    if response.status_code != 200:
        return []

    foods = []
    for food in response.json().get("foods", []):
        nutrients = {n["nutrientName"]: n["value"] for n in food.get("foodNutrients", [])}
        foods.append({
            "description": food.get("description"),
            "fdcId": food.get("fdcId"),
            "energy_kcal": nutrients.get("Energy", 0),
            "protein_g": nutrients.get("Protein", 0),
            "fat_g": nutrients.get("Total lipid (fat)", 0),
            "carbohydrates_g": nutrients.get("Carbohydrate, by difference", 0),
            "fiber_g": nutrients.get("Fiber, total dietary", 0),
            "sugar_g": nutrients.get("Sugars, total including NLEA", 0)
        })
    return foods

# Collecting data
collected = []
unique_ids = set()
total_collected = 0

for term in SEARCH_TERMS:
    for page in range(1, 6):  # Up to 5 pages per term
        foods = get_nutrition_data(term, page)
        for item in foods:
            if item["fdcId"] not in unique_ids:
                unique_ids.add(item["fdcId"])
                collected.append(item)
                total_collected += 1
                if total_collected >= DESIRED_COUNT:
                    break
        if total_collected >= DESIRED_COUNT:
            break
        time.sleep(0.5)
    if total_collected >= DESIRED_COUNT:
        break

# Save to CSV
df = pd.DataFrame(collected)
df.to_csv(OUTPUT_FILE, index=False)
print(f"✅ Done! Saved {len(df)} unique items to {OUTPUT_FILE}")


✅ Done! Saved 1000 unique items to usda_1000_nutrition_data.csv
