In [4]:
import requests
import pandas as pd
import time

# Base URL for OpenFoodFacts API
BASE_URL = "https://world.openfoodfacts.org/cgi/search.pl"

# Parameters for the API request
params = {
    "search_terms": "milk",
    "page_size": 100,        # Items per page
    "json": 1,
    "fields": "product_name,nutriments,brands,code",
}

# Storage for products
all_products = []

# How many pages to scrape
NUM_PAGES = 5

# Start scraping loop
for page in range(1, NUM_PAGES + 1):
    print(f"🔄 Fetching page {page}...")
    params["page"] = page
    response = requests.get(BASE_URL, params=params)

    if response.status_code != 200:
        print(f"❌ Failed to fetch page {page}. Status code: {response.status_code}")
        break

    data = response.json()
    products = data.get("products", [])

    for product in products:
        nutriments = product.get("nutriments", {})
        all_products.append({
            "Product Name": product.get("product_name", "N/A"),
            "Brand": product.get("brands", "N/A"),
            "Calories (kcal)": nutriments.get("energy-kcal_100g"),
            "Proteins (g)": nutriments.get("proteins_100g"),
            "Fat (g)": nutriments.get("fat_100g"),
            "Carbohydrates (g)": nutriments.get("carbohydrates_100g"),
            "Sugar (g)": nutriments.get("sugars_100g"),
            "Salt (g)": nutriments.get("salt_100g"),
            "Code": product.get("code"),
        })

    time.sleep(1)  # Be polite to the API

# Convert to DataFrame
df = pd.DataFrame(all_products)

# Save to CSV
csv_filename = "openfoodfacts_food_data.csv"
df.to_csv(csv_filename, index=False)

print(f"\n✅ Data saved to '{csv_filename}'. Total products collected: {len(df)}")


🔄 Fetching page 1...
🔄 Fetching page 2...
🔄 Fetching page 3...
🔄 Fetching page 4...
🔄 Fetching page 5...

✅ Data saved to 'openfoodfacts_food_data.csv'. Total products collected: 500
