In [2]:
import requests
import pandas as pd
import time
import os
import random

API_KEY = "qxJvuaIfbci1yVQD0YXHXg==RNGyurSoVTcMA4po"
OUTPUT_FILE = "calorieninjas_7000_foods.csv"
DESIRED_COUNT = 7000

# Base food names (you can expand this)
base_foods = [
    "apple", "banana", "orange", "grapes", "mango", "kiwi", "blueberry", "broccoli",
    "carrot", "spinach", "potato", "chicken", "beef", "pork", "bacon", "egg", "milk",
    "cheese", "pizza", "burger", "fries", "cake", "cookie", "ice cream", "salmon",
    "shrimp", "rice", "bread", "yogurt", "tofu", "soup", "chocolate", "smoothie",
    "granola", "tuna", "pasta", "sushi", "burrito", "lentils", "nuts", "avocado",
    "chickpeas", "beans", "honey", "popcorn", "soda", "steak", "mozzarella",
    "lasagna", "cereal", "muffin", "omelet", "sausage", "onion", "garlic", "kale",
    "zucchini", "cauliflower", "peas", "corn", "quinoa", "falafel", "pita", "taco"
]

# Expand queries by adding modifiers
modifiers = [
    "raw", "boiled", "grilled", "fried", "steamed", "roasted", "baked",
    "chopped", "sliced", "1 cup of", "100g of", "cooked", "fresh", "frozen"
]

# Create a large list of unique queries
search_queries = []
for food in base_foods:
    for mod in modifiers:
        search_queries.append(f"{mod} {food}")
random.shuffle(search_queries)

collected_foods = set()
collected_data = []
total_collected = 0

# Setup CSV file if not present
if not os.path.exists(OUTPUT_FILE):
    pd.DataFrame(columns=[
        "name", "calories", "protein_g", "fat_total_g",
        "carbohydrates_total_g", "fiber_g", "sugar_g"
    ]).to_csv(OUTPUT_FILE, index=False)

def fetch_nutrition_data(query):
    url = f"https://api.calorieninjas.com/v1/nutrition?query={query}"
    headers = {"X-Api-Key": API_KEY}
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.json().get("items", [])
        else:
            print(f"⚠️ API error {response.status_code}: {response.text}")
    except Exception as e:
        print(f"❌ Request error: {e}")
    return []

# Main scraping loop
for query in search_queries:
    print(f"🔍 Querying: {query}")
    items = fetch_nutrition_data(query)
    if not items:
        continue

    for item in items:
        name = item.get("name")
        if name and name.lower() not in collected_foods:
            collected_foods.add(name.lower())
            total_collected += 1
            collected_data.append({
                "name": name,
                "calories": item.get("calories"),
                "protein_g": item.get("protein_g"),
                "fat_total_g": item.get("fat_total_g"),
                "carbohydrates_total_g": item.get("carbohydrates_total_g"),
                "fiber_g": item.get("fiber_g"),
                "sugar_g": item.get("sugar_g")
            })
            print(f"✅ [{total_collected}] Collected: {name}")

    # Save to CSV every 100 items
    if len(collected_data) >= 100:
        pd.DataFrame(collected_data).to_csv(OUTPUT_FILE, mode='a', header=False, index=False)
        collected_data = []
        print("💾 Saved batch to CSV")

    if total_collected >= DESIRED_COUNT:
        break

    time.sleep(0.5)  # Avoid hitting rate limits

# Final save if needed
if collected_data:
    pd.DataFrame(collected_data).to_csv(OUTPUT_FILE, mode='a', header=False, index=False)

print(f"\n🎉 DONE! Total unique food items collected: {total_collected}")
print(f"📁 File saved to: {os.path.abspath(OUTPUT_FILE)}")


🔍 Querying: steamed chicken
✅ [1] Collected: steamed chicken
🔍 Querying: cooked egg
✅ [2] Collected: egg
🔍 Querying: raw lasagna
✅ [3] Collected: lasagna
🔍 Querying: 100g of egg
🔍 Querying: baked carrot
✅ [4] Collected: carrot
🔍 Querying: 1 cup of grapes
✅ [5] Collected: grapes
🔍 Querying: grilled beans
✅ [6] Collected: beans
🔍 Querying: baked garlic
✅ [7] Collected: garlic
🔍 Querying: cooked kale
✅ [8] Collected: kale
🔍 Querying: chopped omelet
✅ [9] Collected: omelet
🔍 Querying: grilled beef
✅ [10] Collected: beef
🔍 Querying: grilled mango
✅ [11] Collected: mango
🔍 Querying: fried falafel
✅ [12] Collected: falafel
🔍 Querying: 1 cup of egg
🔍 Querying: 100g of steak
✅ [13] Collected: steak
🔍 Querying: baked egg
🔍 Querying: grilled cheese
✅ [14] Collected: grilled cheese
🔍 Querying: raw soup
✅ [15] Collected: soup
🔍 Querying: roasted spinach
✅ [16] Collected: spinach
🔍 Querying: boiled tofu
✅ [17] Collected: tofu
🔍 Querying: frozen falafel
🔍 Querying: 100g of pizza
✅ [18] Collected: piz