In [1]:
pip install requests pandas



In [2]:
import requests
import pandas as pd
import time

# === Your Edamam API credentials here ===
APP_ID = '59456ca4'
APP_KEY = 'ebeb4beaff17d1a14fcec62a904206a3'

# === EXTENDED SEARCH TERMS ===
SEARCH_TERMS = [
    # Ingredients
    'chicken drumsticks', 'chicken nuggets', 'roast chicken breast', 'chicken casserole',
    'beef burgers', 'beef ribs', 'beef brisket', 'steak', 'sirloin steak',
    'pork', 'pork chops', 'pork ribs', 'pork tenderloin', 'bacon',
    'turkey', 'ground turkey', 'turkey breast', 'roast turkey',
    'duck', 'duck breast', 'roast duck', 'confit duck',
    'lamb', 'lamb chops', 'lamb stew', 'ground lamb',
    'fish', 'cod', 'tuna', 'trout', 'tilapia',
    'crab', 'lobster', 'scallops', 'oysters', 'clams',
    'eggs', 'boiled eggs', 'scrambled eggs', 'omelet', 'poached eggs',
    'beans', 'black beans', 'kidney beans', 'chickpeas', 'white beans',
    'cheese', 'cheddar cheese', 'mozzarella', 'parmesan', 'goat cheese',
    'nuts', 'almonds', 'cashews', 'walnuts', 'pecans',
    'seeds', 'chia seeds', 'flax seeds', 'sunflower seeds', 'pumpkin seeds',

    # World Dishes
    'biryani', 'paella', 'ratatouille', 'shakshuka', 'moussaka',
    'bibimbap', 'pho', 'banh mi', 'laksa', 'tamales',
    'empanadas', 'ceviche', 'arepas', 'gnocchi', 'risotto',
    'crepes', 'coq au vin', 'bouillabaisse', 'cassoulet',
    'goulash', 'pierogi', 'sauerbraten', 'jollof rice', 'egusi soup',
    'poutine', 'chow mein', 'lo mein', 'hot pot', 'dim sum',
    'falafel', 'shawarma', 'tabbouleh', 'kofta', 'hummus bowl',

    # Cooking Methods
    'roasted vegetables', 'grilled meat', 'baked potatoes', 'boiled eggs',
    'fried chicken', 'steamed vegetables', 'poached fish', 'slow cooked beef',
    'air fryer recipes', 'instant pot recipes', 'one pot meals', 'sheet pan dinners',

    # Diets
    'whole30', 'diabetic friendly', 'heart healthy', 'low sodium', 'high protein',
    'low fat', 'high fiber', 'anti-inflammatory recipes', 'pcos diet',
    'renal diet', 'dash diet', 'fodmap recipes', 'low sugar dessert',

    # Occasions
    'holiday dinner', 'thanksgiving turkey', 'christmas cookies', 'easter brunch',
    'valentine’s dinner', 'birthday cake', 'summer bbq', 'picnic food',
    'meal prep', 'office lunch', 'kids lunch', 'school snacks',
    'family dinner', 'romantic dinner', 'weeknight dinner', 'lazy meals',

    # Breakfast
    'french toast', 'pancakes', 'waffles', 'granola', 'muffins',
    'breakfast sandwich', 'breakfast casserole', 'breakfast tacos', 'overnight oats',

    # Desserts
    'brownies', 'cheesecake', 'apple pie', 'banana bread', 'ice cream',
    'cupcakes', 'fudge', 'pudding recipes', 'lemon bars', 'fruit tarts',

    # Seasonal
    'summer salad', 'winter soup', 'fall stew', 'spring vegetables',
    'cold pasta salad', 'hot chili', 'light dinner', 'comfort food',

    # Beverages
    'green smoothie', 'protein shake', 'fruit smoothie', 'vegetable juice', 'detox drink',
    'mocktail recipes', 'iced tea', 'bubble tea', 'chai latte', 'cold brew coffee',

    # Grains & Bread
    'sourdough bread', 'cornbread', 'flatbread', 'naan bread', 'bagels',
    'rice dishes', 'wild rice', 'brown rice', 'couscous', 'bulgur',
    'barley soup', 'farro salad', 'millet recipes', 'amaranth bowl',

    # Trendy
    'avocado recipes', 'cauliflower rice', 'zoodles', 'stuffed peppers', 'buddha bowl',
    'poke bowl', 'cloud bread', 'chaffle recipes', 'cabbage soup', 'egg muffins'
]

# Settings
TOTAL_RECIPES_TARGET = 10000  # Can be increased or decreased
RECIPES_PER_TERM = TOTAL_RECIPES_TARGET // len(SEARCH_TERMS)
RECIPES_PER_REQUEST = 100
BASE_URL = 'https://api.edamam.com/api/recipes/v2'

# Function to fetch recipes
def fetch_recipes(query, total_recipes):
    all_recipes = []
    next_url = f"{BASE_URL}?type=public&q={query}&app_id={APP_ID}&app_key={APP_KEY}&random=true"

    fetched = 0
    request_count = 0
    requests_needed = (total_recipes + RECIPES_PER_REQUEST - 1) // RECIPES_PER_REQUEST

    while fetched < total_recipes:
        if request_count >= requests_needed:
            break

        response = requests.get(next_url)
        if response.status_code != 200:
            print(f"Error for '{query}': {response.status_code}")
            break

        data = response.json()
        hits = data.get('hits', [])
        if not hits:
            break

        for hit in hits:
            recipe = hit['recipe']
            all_recipes.append({
                'title': recipe.get('label', ''),
                'ingredients': '; '.join(recipe.get('ingredientLines', [])),
                'calories': recipe['totalNutrients'].get('ENERC_KCAL', {}).get('quantity', 0),
                'fat': recipe['totalNutrients'].get('FAT', {}).get('quantity', 0),
                'protein': recipe['totalNutrients'].get('PROCNT', {}).get('quantity', 0),
                'carbs': recipe['totalNutrients'].get('CHOCDF', {}).get('quantity', 0),
                'url': recipe.get('url', ''),
                'servings': recipe.get('yield', 1)
            })
            fetched += 1
            if fetched >= total_recipes:
                break

        next_link = data.get('_links', {}).get('next', {}).get('href')
        if not next_link:
            break
        next_url = next_link

        request_count += 1
        time.sleep(6)  # Respect API rate limits

    print(f"Fetched {fetched} recipes for '{query}'")
    return all_recipes

# Save to CSV
def save_to_csv(recipes, filename):
    df = pd.DataFrame(recipes)
    df.to_csv(filename, index=False)
    print(f"Saved {len(df)} recipes to {filename}")

# Main Script
if __name__ == "__main__":
    for term in SEARCH_TERMS:
        print(f"\n--- Starting '{term}' ---")
        data = fetch_recipes(term, RECIPES_PER_TERM)
        save_to_csv(data, f"{term.replace(' ', '_')}_recipes.csv")

    print("\nAll scraping tasks completed!")



--- Starting 'chicken drumsticks' ---
Fetched 20 recipes for 'chicken drumsticks'
Saved 20 recipes to chicken_drumsticks_recipes.csv

--- Starting 'chicken nuggets' ---
Fetched 20 recipes for 'chicken nuggets'
Saved 20 recipes to chicken_nuggets_recipes.csv

--- Starting 'roast chicken breast' ---
Fetched 20 recipes for 'roast chicken breast'
Saved 20 recipes to roast_chicken_breast_recipes.csv

--- Starting 'chicken casserole' ---
Fetched 20 recipes for 'chicken casserole'
Saved 20 recipes to chicken_casserole_recipes.csv

--- Starting 'beef burgers' ---
Fetched 20 recipes for 'beef burgers'
Saved 20 recipes to beef_burgers_recipes.csv

--- Starting 'beef ribs' ---
Fetched 20 recipes for 'beef ribs'
Saved 20 recipes to beef_ribs_recipes.csv

--- Starting 'beef brisket' ---
Fetched 20 recipes for 'beef brisket'
Saved 20 recipes to beef_brisket_recipes.csv

--- Starting 'steak' ---
Fetched 20 recipes for 'steak'
Saved 20 recipes to steak_recipes.csv

--- Starting 'sirloin steak' ---
Fe

In [4]:
import pandas as pd
import glob
import os

# Path where your CSV files are stored (adjust if needed)
csv_folder = '.'  # current directory

# Pattern matching all recipe CSV files
csv_files = glob.glob(os.path.join(csv_folder, '*_recipes.csv'))

# List to hold each dataframe
dfs = []

for file in csv_files:
    try:
        df = pd.read_csv(file)
        # Check if the DataFrame is not empty before appending
        if not df.empty:
            dfs.append(df)
        else:
            print(f"Skipping empty file: {file}")
    except pd.errors.EmptyDataError:
        # Handle the specific error if a file is truly empty or only has headers
        print(f"Skipping empty or header-only file: {file}")
    except Exception as e:
        # Catch any other potential errors during reading
        print(f"Error reading file {file}: {e}")


# Check if there are any dataframes to concatenate
if dfs:
    # Combine all dataframes into one
    combined_df = pd.concat(dfs, ignore_index=True)

    # Optional: remove duplicates if any
    combined_df.drop_duplicates(inplace=True)

    # Save the combined dataframe
    combined_df.to_csv('combined_recipes.csv', index=False)

    print(f"Combined {len(csv_files)} files into 'combined_recipes.csv' with {len(combined_df)} rows (skipped empty/invalid files).")
else:
    print("No valid dataframes found to combine.")

Skipping empty or header-only file: ./cupcakes_recipes.csv
Skipping empty or header-only file: ./breakfast_sandwich_recipes.csv
Skipping empty or header-only file: ./light_dinner_recipes.csv
Skipping empty or header-only file: ./cold_pasta_salad_recipes.csv
Skipping empty or header-only file: ./banana_bread_recipes.csv
Skipping empty or header-only file: ./breakfast_casserole_recipes.csv
Skipping empty or header-only file: ./pudding_recipes_recipes.csv
Skipping empty or header-only file: ./summer_salad_recipes.csv
Skipping empty or header-only file: ./fall_stew_recipes.csv
Skipping empty or header-only file: ./pancakes_recipes.csv
Skipping empty or header-only file: ./pcos_diet_recipes.csv
Skipping empty or header-only file: ./overnight_oats_recipes.csv
Skipping empty or header-only file: ./green_smoothie_recipes.csv
Skipping empty or header-only file: ./cheesecake_recipes.csv
Skipping empty or header-only file: ./lazy_meals_recipes.csv
Skipping empty or header-only file: ./granola_rec