In [3]:
import requests
import pandas as pd
import time

BASE_URL = "https://world.openfoodfacts.org/cgi/search.pl"

params = {
    "search_terms": "vegetables",
    "page_size": 50,
    "json": 1,
    "fields": "product_name,nutriments,brands,code,ingredients_text,serving_suggestions",
}

all_products = []
NUM_PAGES = 5

for page in range(1, NUM_PAGES + 1):
    success = False
    retries = 3
    for attempt in range(retries):
        try:
            print(f"🔄 Fetching page {page}, attempt {attempt + 1}...")
            params["page"] = page
            response = requests.get(BASE_URL, params=params, timeout=10)
            if response.status_code == 200:
                data = response.json()
                products = data.get("products", [])
                for product in products:
                    nutriments = product.get("nutriments", {})
                    all_products.append({
                        "Product Name": product.get("product_name", "N/A"),
                        "Brand": product.get("brands", "N/A"),
                        "Calories (kcal)": nutriments.get("energy-kcal_100g"),
                        "Proteins (g)": nutriments.get("proteins_100g"),
                        "Fat (g)": nutriments.get("fat_100g"),
                        "Carbohydrates (g)": nutriments.get("carbohydrates_100g"),
                        "Sugar (g)": nutriments.get("sugars_100g"),
                        "Salt (g)": nutriments.get("salt_100g"),
                        "Code": product.get("code"),
                        "Ingredients Text": product.get("ingredients_text", "N/A"),
                        "Serving Suggestions": product.get("serving_suggestions", "N/A")
                    })
                success = True
                break
            else:
                print(f"❌ Status code {response.status_code}")
        except Exception as e:
            print(f"⚠️ Error on attempt {attempt + 1}: {e}")
            time.sleep(2)

    if not success:
        print(f"❌ Failed to fetch page {page} after {retries} attempts.")
        break

    time.sleep(1)

# Save data
df = pd.DataFrame(all_products)
df.to_csv("openfoodfacts_vegetables_with_pseudo_recipes.csv", index=False)
print(f"\n✅ Saved {len(df)} products to 'openfoodfacts_vegetables_with_pseudo_recipes.csv'")


🔄 Fetching page 1, attempt 1...
🔄 Fetching page 2, attempt 1...
🔄 Fetching page 3, attempt 1...
🔄 Fetching page 4, attempt 1...
🔄 Fetching page 5, attempt 1...

✅ Saved 250 products to 'openfoodfacts_vegetables_with_pseudo_recipes.csv'


In [1]:
pip install googletrans==4.0.0-rc1




In [2]:
import pandas as pd
from googletrans import Translator

# Load your CSV
df = pd.read_csv("openfoodfacts_vegetables_with_pseudo_recipes.csv")

# Initialize the translator
translator = Translator()

# Translate Ingredients Text
def translate(text):
    try:
        return translator.translate(text, dest='en').text
    except:
        return text

# Translate only non-null values
df["Ingredients Text"] = df["Ingredients Text"].apply(lambda x: translate(x) if pd.notna(x) else x)

# Save the translated file
df.to_csv("vegetables_ingredients_translated.csv", index=False)
print("✅ Translated file saved as 'vegetables_ingredients_translated.csv'")


✅ Translated file saved as 'vegetables_ingredients_translated.csv'
