In [None]:
# Initial prompt - for the project nutrifusion, I need to scrap 3 types of datasets. At first I need to scrap recipe and nutrients from edamam.
# Final prompt - In the given code give as much as possible search terms so that i can scrap maximum amount of data

In [1]:
pip install requests pandas



In [2]:
import requests
import pandas as pd
import time

# === Your Edamam API credentials here ===
APP_ID = '59456ca4'
APP_KEY = 'ebeb4beaff17d1a14fcec62a904206a3'

# === Extended Search Terms to maximize the data scraping ===
SEARCH_TERMS = [
    # Ingredients
    'chicken', 'chicken breast', 'chicken thighs', 'ground chicken', 'roast chicken', 'grilled chicken', 'chicken soup', 'chicken wings',
    'beef', 'ground beef', 'beef stew', 'roast beef', 'beef tacos', 'beef stir fry',
    'salmon', 'grilled salmon', 'baked salmon', 'salmon fillets', 'salmon steak', 'salmon sushi',
    'tofu', 'fried tofu', 'tofu stir fry', 'tofu scramble', 'tofu curry',
    'mushrooms', 'portobello mushrooms', 'shiitake mushrooms', 'mushroom soup',
    'lentils', 'red lentils', 'green lentils', 'lentil stew', 'lentil curry',
    'shrimp', 'shrimp stir fry', 'shrimp tacos', 'shrimp pasta', 'shrimp soup',
    'avocado', 'avocado toast', 'avocado salad', 'avocado smoothie', 'guacamole',
    'quinoa', 'quinoa salad', 'quinoa stir fry', 'quinoa bowl', 'quinoa soup',

    # Cuisines
    'italian', 'italian pasta', 'italian pizza', 'italian soups', 'italian lasagna', 'italian meatballs',
    'mexican', 'mexican tacos', 'mexican burritos', 'mexican chili', 'mexican enchiladas',
    'indian', 'indian curry', 'indian rice', 'indian lentils', 'indian naan', 'indian biryani',
    'thai', 'thai curry', 'thai noodles', 'thai salad', 'pad thai',
    'french', 'french soup', 'french baguette', 'french cuisine', 'french toast',
    'japanese', 'sushi', 'ramen', 'sushi rolls', 'teriyaki chicken',
    'mediterranean', 'mediterranean salad', 'mediterranean diet', 'mediterranean hummus',
    'korean', 'korean bbq', 'kimchi', 'korean noodles', 'korean soup',

    # Diets
    'vegan', 'vegan soup', 'vegan curry', 'vegan salad', 'vegan dessert',
    'vegetarian', 'vegetarian pizza', 'vegetarian pasta', 'vegetarian stew', 'vegetarian lasagna',
    'gluten free', 'gluten free pizza', 'gluten free pasta', 'gluten free bread',
    'keto', 'keto dinner', 'keto salad', 'keto chicken', 'keto snacks',
    'paleo', 'paleo soup', 'paleo diet', 'paleo salad',
    'low carb', 'low carb dinner', 'low carb salad', 'low carb pizza',
    'dairy free', 'dairy free salad', 'dairy free cheese', 'dairy free cake',

    # Meal Types
    'breakfast', 'breakfast bowl', 'breakfast smoothie', 'breakfast burrito',
    'lunch', 'lunch salad', 'lunch bowl', 'lunch sandwiches',
    'dinner', 'dinner casserole', 'dinner salad', 'dinner pizza',
    'snack', 'snack bars', 'snack bites', 'snack dips',
    'dessert', 'dessert cake', 'dessert cookies', 'dessert pudding',
    'appetizer', 'appetizer salads', 'appetizer dips', 'appetizer recipes',
    'side dish', 'side salad', 'side vegetables', 'side casserole',

    # Dish Types
    'pasta', 'pasta salad', 'pasta dishes', 'pasta soup',
    'pizza', 'pizza dough', 'pizza toppings', 'pizza recipes',
    'soup', 'soup recipes', 'chicken soup', 'vegetable soup',
    'salad', 'green salad', 'fruit salad', 'pasta salad',
    'curry', 'vegetable curry', 'chicken curry', 'beef curry',
    'stir fry', 'chicken stir fry', 'vegetable stir fry', 'shrimp stir fry',
    'sandwich', 'vegan sandwich', 'chicken sandwich', 'vegetarian sandwich'
]

# Settings
TOTAL_RECIPES_TARGET = 8000  # You can change this based on how many recipes you want
RECIPES_PER_TERM = TOTAL_RECIPES_TARGET // len(SEARCH_TERMS)
RECIPES_PER_REQUEST = 100
BASE_URL = 'https://api.edamam.com/api/recipes/v2'

# Function to fetch recipes for a given query
def fetch_recipes(query, total_recipes):
    all_recipes = []
    next_url = f"{BASE_URL}?type=public&q={query}&app_id={APP_ID}&app_key={APP_KEY}&random=true"

    fetched = 0
    request_count = 0
    requests_needed = (total_recipes + RECIPES_PER_REQUEST - 1) // RECIPES_PER_REQUEST

    while fetched < total_recipes:
        if request_count >= requests_needed:
            break

        # Making the API request
        response = requests.get(next_url)
        if response.status_code != 200:
            print(f"Error for '{query}': {response.status_code}")
            break

        data = response.json()
        hits = data.get('hits', [])
        if not hits:
            break

        # Extracting recipe data
        for hit in hits:
            recipe = hit['recipe']
            all_recipes.append({
                'title': recipe.get('label', ''),
                'ingredients': '; '.join(recipe.get('ingredientLines', [])),
                'calories': recipe['totalNutrients'].get('ENERC_KCAL', {}).get('quantity', 0),
                'fat': recipe['totalNutrients'].get('FAT', {}).get('quantity', 0),
                'protein': recipe['totalNutrients'].get('PROCNT', {}).get('quantity', 0),
                'carbs': recipe['totalNutrients'].get('CHOCDF', {}).get('quantity', 0),
                'url': recipe.get('url', ''),
                'servings': recipe.get('yield', 1)
            })
            fetched += 1
            if fetched >= total_recipes:
                break

        # Handling pagination if there's more data
        next_link = data.get('_links', {}).get('next', {}).get('href')
        if not next_link:
            break
        next_url = next_link

        request_count += 1
        # Respect rate limits: wait for 6 seconds between requests
        time.sleep(6)

    print(f"Fetched {fetched} recipes for '{query}'")
    return all_recipes

# Function to save recipes to CSV
def save_to_csv(recipes, filename):
    df = pd.DataFrame(recipes)
    df.to_csv(filename, index=False)
    print(f"Saved {len(df)} recipes to {filename}")

# Main execution
if __name__ == "__main__":
    for term in SEARCH_TERMS:
        print(f"\n--- Starting '{term}' ---")
        # Fetch recipes for each search term
        data = fetch_recipes(term, RECIPES_PER_TERM)
        # Save recipes to CSV
        save_to_csv(data, f"{term.replace(' ', '_')}_recipes.csv")

    print("\nAll scraping tasks completed!")


--- Starting 'chicken' ---
Fetched 20 recipes for 'chicken'
Saved 20 recipes to chicken_recipes.csv

--- Starting 'chicken breast' ---
Fetched 20 recipes for 'chicken breast'
Saved 20 recipes to chicken_breast_recipes.csv

--- Starting 'chicken thighs' ---
Error for 'chicken thighs': 429
Fetched 0 recipes for 'chicken thighs'
Saved 0 recipes to chicken_thighs_recipes.csv

--- Starting 'ground chicken' ---
Fetched 20 recipes for 'ground chicken'
Saved 20 recipes to ground_chicken_recipes.csv

--- Starting 'roast chicken' ---
Error for 'roast chicken': 429
Fetched 0 recipes for 'roast chicken'
Saved 0 recipes to roast_chicken_recipes.csv

--- Starting 'grilled chicken' ---
Fetched 20 recipes for 'grilled chicken'
Saved 20 recipes to grilled_chicken_recipes.csv

--- Starting 'chicken soup' ---
Error for 'chicken soup': 429
Fetched 0 recipes for 'chicken soup'
Saved 0 recipes to chicken_soup_recipes.csv

--- Starting 'chicken wings' ---
Fetched 20 recipes for 'chicken wings'
Saved 20 reci

In [3]:
import zipfile
import os

# Create a zip file containing all *_recipes.csv files
with zipfile.ZipFile('edamam_csvs.zip', 'w') as zipf:
    for file in os.listdir():
        if file.endswith('_recipes.csv'):
            zipf.write(file)

print("All CSVs zipped to 'edamam_csvs.zip'")

All CSVs zipped to 'edamam_csvs.zip'


In [4]:
from google.colab import files
files.download('edamam_csvs.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>