In [3]:
import requests
import pandas as pd

def get_meals_by_category(category='Beef'):
    """Get meals list by category from TheMealDB API"""
    url = f"https://www.themealdb.com/api/json/v1/1/filter.php?c={category}"
    response = requests.get(url)
    return response.json().get('meals', [])

def get_meal_details(meal_id):
    """Get full details of a specific meal"""
    url = f"https://www.themealdb.com/api/json/v1/1/lookup.php?i={meal_id}"
    response = requests.get(url)
    meals = response.json().get('meals', [])
    return meals[0] if meals else {}

def extract_ingredients(meal):
    """Extract ingredients and measurements as a dictionary"""
    ingredients = {}
    for i in range(1, 21):  # TheMealDB supports up to 20 ingredients
        ing = meal.get(f"strIngredient{i}")
        meas = meal.get(f"strMeasure{i}")
        if ing and ing.strip():
            ingredients[ing.strip()] = meas.strip() if meas else ''
    return ingredients

def scrape_meals(category='Beef', limit=10):
    """Scrape multiple meals and return as a DataFrame"""
    meals = get_meals_by_category(category)
    meal_data = []

    for meal in meals[:limit]:
        details = get_meal_details(meal['idMeal'])
        data = {
            'MealID': meal['idMeal'],
            'Meal': details.get('strMeal'),
            'Category': details.get('strCategory'),
            'Area': details.get('strArea'),
            'Instructions': details.get('strInstructions'),
            'Tags': details.get('strTags'),
            'YouTube': details.get('strYoutube'),
            'Ingredients': extract_ingredients(details)
        }
        meal_data.append(data)

    return pd.DataFrame(meal_data)

# Example usage:
df_recipes = scrape_meals(category='Chicken', limit=5)
df_recipes.to_csv("themealdb_recipes.csv", index=False)
print("✅ Scraped and saved recipes to 'themealdb_recipes.csv'")

✅ Scraped and saved recipes to 'themealdb_recipes.csv'


In [None]:
import requests
import pandas as pd
import time

def get_all_categories():
    url = "https://www.themealdb.com/api/json/v1/1/list.php?c=list"
    response = requests.get(url)
    return [item['strCategory'] for item in response.json().get('meals', [])]

def get_meals_by_category(category):
    url = f"https://www.themealdb.com/api/json/v1/1/filter.php?c={category}"
    response = requests.get(url)
    return response.json().get('meals', [])

def get_meal_details(meal_id):
    url = f"https://www.themealdb.com/api/json/v1/1/lookup.php?i={meal_id}"
    response = requests.get(url)
    meals = response.json().get('meals', [])
    return meals[0] if meals else {}

def extract_ingredients(meal):
    ingredients = {}
    for i in range(1, 21):
        ing = meal.get(f"strIngredient{i}")
        meas = meal.get(f"strMeasure{i}")
        if ing and ing.strip():
            ingredients[ing.strip()] = meas.strip() if meas else ''
    return ingredients

def scrape_all_recipes(minimum=100):
    all_data = []
    seen_ids = set()
    categories = get_all_categories()

    for category in categories:
        meals = get_meals_by_category(category)
        for meal in meals:
            meal_id = meal['idMeal']
            if meal_id not in seen_ids:
                details = get_meal_details(meal_id)
                recipe = {
                    'MealID': meal_id,
                    'Meal': details.get('strMeal'),
                    'Category': details.get('strCategory'),
                    'Area': details.get('strArea'),
                    'Instructions': details.get('strInstructions'),
                    'Tags': details.get('strTags'),
                    'YouTube': details.get('strYoutube'),
                    'Ingredients': extract_ingredients(details)
                }
                all_data.append(recipe)
                seen_ids.add(meal_id)
                time.sleep(0.2)  # Be polite to the API server

                if len(all_data) >= minimum:
                    break
        if len(all_data) >= minimum:
            break

    return pd.DataFrame(all_data)

# Scrape and save at least 120 recipes
df_all_recipes = scrape_all_recipes(minimum=120)
df_all_recipes.to_csv("all_mealdb_recipes_120+.csv", index=False)
print(f"✅ Scraped {len(df_all_recipes)} recipes and saved to 'all_mealdb_recipes_120+.csv'")

In [1]:
import requests
import pandas as pd
import time

def get_all_categories():
    url = "https://www.themealdb.com/api/json/v1/1/list.php?c=list"
    response = requests.get(url)
    return [item['strCategory'] for item in response.json().get('meals', [])]

def get_meals_by_category(category):
    url = f"https://www.themealdb.com/api/json/v1/1/filter.php?c={category}"
    response = requests.get(url)
    return response.json().get('meals', [])

def get_meal_details(meal_id):
    url = f"https://www.themealdb.com/api/json/v1/1/lookup.php?i={meal_id}"
    response = requests.get(url)
    meals = response.json().get('meals', [])
    return meals[0] if meals else {}

def extract_ingredients(meal):
    ingredients = {}
    for i in range(1, 21):
        ing = meal.get(f"strIngredient{i}")
        meas = meal.get(f"strMeasure{i}")
        if ing and ing.strip():
            ingredients[ing.strip()] = meas.strip() if meas else ''
    return ingredients

def scrape_all_available_recipes():
    all_data = []
    seen_ids = set()
    categories = get_all_categories()

    for category in categories:
        meals = get_meals_by_category(category)
        for meal in meals:
            meal_id = meal['idMeal']
            if meal_id not in seen_ids:
                details = get_meal_details(meal_id)
                recipe = {
                    'MealID': meal_id,
                    'Meal': details.get('strMeal'),
                    'Category': details.get('strCategory'),
                    'Area': details.get('strArea'),
                    'Instructions': details.get('strInstructions'),
                    'Tags': details.get('strTags'),
                    'YouTube': details.get('strYoutube'),
                    'Ingredients': extract_ingredients(details)
                }
                all_data.append(recipe)
                seen_ids.add(meal_id)
                time.sleep(0.2)  # Be polite to the API server

    return pd.DataFrame(all_data)

# Scrape all available recipes (~300+)
df_all = scrape_all_available_recipes()
df_all.to_csv("TheMealDB_all_recipes.csv", index=False)
print(f"✅ Scraped {len(df_all)} unique recipes and saved to 'TheMealDB_all_recipes.csv'")

✅ Scraped 304 unique recipes and saved to 'TheMealDB_all_recipes.csv'


In [2]:
import requests
import pandas as pd
import time

API_KEY = "your_usda_api_key_here"  # Replace this with your actual API key

def search_usda_ingredient(query):
    """Search for an ingredient on USDA FoodData Central"""
    url = f"https://api.nal.usda.gov/fdc/v1/foods/search"
    params = {
        "query": query,
        "pageSize": 1,
        "api_key": API_KEY
    }
    r = requests.get(url, params=params)
    results = r.json().get("foods", [])
    return results[0] if results else None

def get_nutrients(fdc_id):
    """Get nutrient details for a specific food item"""
    url = f"https://api.nal.usda.gov/fdc/v1/food/{fdc_id}"
    params = {"api_key": API_KEY}
    r = requests.get(url, params=params)
    data = r.json()

    nutrients = {}
    for item in data.get("foodNutrients", []):
        name = item.get("nutrientName", "").lower()
        amount = item.get("value", 0)
        unit = item.get("unitName", "")
        if name in ["energy", "protein", "total lipid (fat)", "carbohydrate, by difference"]:
            nutrients[name] = f"{amount} {unit}"
    return nutrients

def estimate_recipe_nutrition(ingredients_dict):
    """Estimate total nutrition from all ingredients in a recipe"""
    recipe_nutrition = {
        'calories_kcal': 0,
        'protein_g': 0,
        'fat_g': 0,
        'carbs_g': 0
    }

    for ingredient in ingredients_dict:
        try:
            item = search_usda_ingredient(ingredient)
            if not item: continue

            nutrients = get_nutrients(item["fdcId"])
            # Convert to standard grams for rough estimates (100g servings)
            recipe_nutrition['calories_kcal'] += float(nutrients.get("energy", "0 kcal").split()[0])
            recipe_nutrition['protein_g'] += float(nutrients.get("protein", "0 g").split()[0])
            recipe_nutrition['fat_g'] += float(nutrients.get("total lipid (fat)", "0 g").split()[0])
            recipe_nutrition['carbs_g'] += float(nutrients.get("carbohydrate, by difference", "0 g").split()[0])

            time.sleep(0.3)  # Avoid rate limits
        except Exception as e:
            print(f"Error with ingredient '{ingredient}': {e}")

    return recipe_nutrition

# Load your previously scraped CSV
df = pd.read_csv("TheMealDB_all_recipes.csv")
df['EstimatedCalories_kcal'] = 0.0
df['EstimatedProtein_g'] = 0.0
df['EstimatedFat_g'] = 0.0
df['EstimatedCarbs_g'] = 0.0

# Ingredients column is a stringified dict, so convert it
import ast
df['Ingredients'] = df['Ingredients'].apply(ast.literal_eval)

for idx, row in df.iterrows():
    nutrition = estimate_recipe_nutrition(row['Ingredients'])
    df.at[idx, 'EstimatedCalories_kcal'] = nutrition['calories_kcal']
    df.at[idx, 'EstimatedProtein_g'] = nutrition['protein_g']
    df.at[idx, 'EstimatedFat_g'] = nutrition['fat_g']
    df.at[idx, 'EstimatedCarbs_g'] = nutrition['carbs_g']
    print(f"Processed: {row['Meal']}")

# Save enriched dataset
df.to_csv("TheMealDB_with_nutrition.csv", index=False)
print("✅ Nutrition values added and saved to 'TheMealDB_with_nutrition.csv'")


Processed: Beef and Mustard Pie
Processed: Beef and Oyster pie
Processed: Beef Asado
Processed: Beef Banh Mi Bowls with Sriracha Mayo, Carrot & Pickled Cucumber
Processed: Beef Bourguignon
Processed: Beef Brisket Pot Roast
Processed: Beef Caldereta
Processed: Beef Dumpling Stew
Processed: Beef Lo Mein
Processed: Beef Mechado
Processed: Beef Rendang
Processed: Beef stroganoff
Processed: Beef Sunday Roast
Processed: Beef Wellington
Processed: Big Mac
Processed: Bistek
Processed: Bitterballen (Dutch meatballs)
Processed: Braised Beef Chilli
Processed: Cevapi Sausages
Processed: Chivito uruguayo
Processed: Corned Beef and Cabbage
Processed: Croatian Bean Stew
Processed: Croatian lamb peka
Processed: Egyptian Fatteh
Processed: Golabki (cabbage roll)
Processed: Irish stew
Processed: Jamaican Beef Patties
Processed: Ma Po Tofu
Processed: Massaman Beef curry
Processed: Minced Beef Pie
Processed: Montreal Smoked Meat
Processed: Moussaka
Processed: Mulukhiyah
Processed: Oxtail with broad beans
P

In [None]:
import requests
import pandas as pd
import time

# Replace with your own credentials
APP_ID = 'your_app_id'
APP_KEY = 'your_app_key'

def get_recipes(query, max_results=50):
    url = 'https://api.edamam.com/api/recipes/v2'
    params = {
        'type': 'public',
        'q': query,
        'app_id': APP_ID,
        'app_key': APP_KEY,
        'random': 'true'
    }
    recipes = []
    seen_uris = set()

    while len(recipes) < max_results:
        response = requests.get(url, params=params)
        data = response.json()
        hits = data.get('hits', [])
        for hit in hits:
            recipe_data = hit['recipe']
            uri = recipe_data['uri']
            if uri in seen_uris:
                continue
            seen_uris.add(uri)

            recipe = {
                'Label': recipe_data['label'],
                'Source': recipe_data['source'],
                'URL': recipe_data['url'],
                'Servings': recipe_data['yield'],
                'Calories': recipe_data['calories'],
                'TotalWeight': recipe_data['totalWeight'],
                'CuisineType': recipe_data.get('cuisineType', []),
                'MealType': recipe_data.get('mealType', []),
                'DishType': recipe_data.get('dishType', []),
                'Ingredients': '; '.join([i['text'] for i in recipe_data['ingredients']]),
                'Nutrients': recipe_data['totalNutrients']
            }
            recipes.append(recipe)

            if len(recipes) >= max_results:
                break

        if '_links' in data and 'next' in data['_links']:
            url = data['_links']['next']['href']
            params = None  # URL already contains all parameters
            time.sleep(1)
        else:
            break

    return recipes

def flatten_nutrients(nutrients_dict):
    flattened = {}
    for k, v in nutrients_dict.items():
        flattened[f"{k}_{v.get('label')}"] = v.get('quantity')
    return flattened

def build_dataframe(recipes):
    records = []
    for r in recipes:
        base = {
            'Label': r['Label'],
            'Source': r['Source'],
            'URL': r['URL'],
            'Servings': r['Servings'],
            'Calories': r['Calories'],
            'TotalWeight': r['TotalWeight'],
            'CuisineType': ', '.join(r['CuisineType']),
            'MealType': ', '.join(r['MealType']),
            'DishType': ', '.join(r['DishType']),
            'Ingredients': r['Ingredients']
        }
        nutrients = flatten_nutrients(r['Nutrients'])
        base.update(nutrients)
        records.append(base)
    return pd.DataFrame(records)

# Run the scraper
recipes = get_recipes("chicken", max_results=50)
df = build_dataframe(recipes)
df.to_csv("Edamam_chicken_recipes_with_nutrients.csv", index=False)
print(f"✅ Saved {len(df)} recipes with nutrient info to 'Edamam_chicken_recipes_with_nutrients.csv'")


In [4]:
import pandas as pd

# Load both recipe files
df_120 = pd.read_csv("themealdb_recipes.csv")
df_302 = pd.read_csv("TheMealDB_all_recipes.csv")

# Concatenate both datasets
df_combined = pd.concat([df_120, df_302], ignore_index=True)

# Drop duplicates based on MealID
df_combined = df_combined.drop_duplicates(subset='MealID')

# Save the merged dataset
df_combined.to_csv("TheMealDB_merged_recipes.csv", index=False)

print(f"✅ Merged dataset saved with {len(df_combined)} unique recipes.")


✅ Merged dataset saved with 304 unique recipes.


In [5]:
import requests
import pandas as pd
import time

API_KEY = "your_usda_api_key"

def search_foods(query, max_results=20):
    url = "https://api.nal.usda.gov/fdc/v1/foods/search"
    params = {
        "api_key": API_KEY,
        "query": query,
        "pageSize": max_results
    }
    response = requests.get(url, params=params)
    return response.json().get("foods", [])

def get_food_details(fdc_id):
    url = f"https://api.nal.usda.gov/fdc/v1/food/{fdc_id}"
    params = {"api_key": API_KEY}
    response = requests.get(url, params=params)
    return response.json()

def build_food_nutrient_dataset(query, max_results=20):
    results = []
    foods = search_foods(query, max_results=max_results)
    for food in foods:
        fdc_id = food["fdcId"]
        details = get_food_details(fdc_id)
        nutrients = {n["nutrientName"]: n["value"] for n in details.get("foodNutrients", [])}
        record = {
            "FDC_ID": fdc_id,
            "Description": details.get("description"),
            "Brand": details.get("brandOwner", ""),
            **nutrients
        }
        results.append(record)
        time.sleep(0.2)
    return pd.DataFrame(results)

# Example: get nutrient data for 20 common foods related to "chicken"
df = build_food_nutrient_dataset("chicken", max_results=20)
df.to_csv("USDA_chicken_ingredients_nutrients.csv", index=False)
print(f"✅ Saved {len(df)} items with nutrient values to 'USDA_chicken_ingredients_nutrients.csv'")


✅ Saved 0 items with nutrient values to 'USDA_chicken_ingredients_nutrients.csv'


In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

def get_tasty_recipe_links(pages=10):
    base_url = "https://tasty.co/page/"
    recipe_links = []

    for page in range(1, pages + 1):
        url = base_url + str(page)
        res = requests.get(url)
        soup = BeautifulSoup(res.content, "html.parser")
        for link in soup.find_all("a", href=True):
            href = link["href"]
            if href.startswith("/recipe/") and href not in recipe_links:
                recipe_links.append("https://tasty.co" + href)
        time.sleep(1)
    return list(set(recipe_links))

def scrape_tasty_recipe(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.content, "html.parser")

    title = soup.find("h1").text.strip() if soup.find("h1") else ""
    ingredients = [li.text.strip() for li in soup.select(".ingredients__section li")]
    instructions = [step.text.strip() for step in soup.select(".prep-steps li")]
    nutrition = soup.find("div", class_="nutrition")

    return {
        "Title": title,
        "Ingredients": "; ".join(ingredients),
        "Instructions": " | ".join(instructions),
        "Nutrition": nutrition.text.strip() if nutrition else "N/A",
        "URL": url
    }

def scrape_tasty_recipes(max_recipes=300):
    links = get_tasty_recipe_links(pages=30)
    print(f"🔗 Found {len(links)} recipe links.")
    all_recipes = []

    for i, link in enumerate(links[:max_recipes]):
        print(f"Scraping ({i+1}/{max_recipes}): {link}")
        try:
            recipe = scrape_tasty_recipe(link)
            all_recipes.append(recipe)
        except Exception as e:
            print(f"❌ Failed to scrape: {e}")
        time.sleep(1)

    return pd.DataFrame(all_recipes)

# Run it
df = scrape_tasty_recipes(max_recipes=300)
df.to_csv("tasty_300_recipes_with_nutrition.csv", index=False)
print(f"✅ Saved {len(df)} recipes to 'tasty_300_recipes_with_nutrition.csv'")


🔗 Found 0 recipe links.
✅ Saved 0 recipes to 'tasty_300_recipes_with_nutrition.csv'


In [7]:
pip install recipe-scrapers pandas


Collecting recipe-scrapers
  Downloading recipe_scrapers-15.7.1-py3-none-any.whl.metadata (6.2 kB)
Collecting extruct>=0.17.0 (from recipe-scrapers)
  Downloading extruct-0.18.0-py2.py3-none-any.whl.metadata (36 kB)
Collecting isodate>=0.6.1 (from recipe-scrapers)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Collecting lxml-html-clean (from extruct>=0.17.0->recipe-scrapers)
  Downloading lxml_html_clean-0.4.2-py3-none-any.whl.metadata (2.4 kB)
Collecting rdflib>=6.0.0 (from extruct>=0.17.0->recipe-scrapers)
  Downloading rdflib-7.1.4-py3-none-any.whl.metadata (11 kB)
Collecting pyrdfa3 (from extruct>=0.17.0->recipe-scrapers)
  Downloading pyRdfa3-3.6.4-py3-none-any.whl.metadata (3.4 kB)
Collecting mf2py (from extruct>=0.17.0->recipe-scrapers)
  Downloading mf2py-2.0.1-py3-none-any.whl.metadata (5.4 kB)
Collecting w3lib (from extruct>=0.17.0->recipe-scrapers)
  Downloading w3lib-2.3.1-py3-none-any.whl.metadata (2.3 kB)
Collecting html-text>=0.5.1 (from extruct>=0.17.0->

In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from recipe_scrapers import scrape_me

def get_recipe_links(category_url, max_recipes=300):
    recipe_links = []
    page = 1
    while len(recipe_links) < max_recipes:
        url = f"{category_url}?page={page}"
        print(f"Fetching: {url}")
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to fetch page {page}")
            break
        soup = BeautifulSoup(response.content, 'html.parser')
        links = soup.select('a.card__titleLink')
        if not links:
            print("No more recipes found.")
            break
        for link in links:
            href = link.get('href')
            if href and href.startswith('https://www.allrecipes.com/recipe/'):
                recipe_links.append(href)
                if len(recipe_links) >= max_recipes:
                    break
        page += 1
        time.sleep(1)  # Be polite to the server
    return recipe_links

def scrape_recipe(url):
    try:
        scraper = scrape_me(url)
        return {
            'Title': scraper.title(),
            'Total Time': scraper.total_time(),
            'Yields': scraper.yields(),
            'Ingredients': scraper.ingredients(),
            'Instructions': scraper.instructions(),
            'Nutrients': scraper.nutrients() if hasattr(scraper, 'nutrients') else None,
            'URL': url
        }
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return None

def main():
    category_url = 'https://www.allrecipes.com/recipes/78/breakfast-and-brunch/'
    recipe_urls = get_recipe_links(category_url, max_recipes=300)
    print(f"Found {len(recipe_urls)} recipe URLs.")

    data = []
    for idx, url in enumerate(recipe_urls, 1):
        print(f"Scraping ({idx}/{len(recipe_urls)}): {url}")
        recipe = scrape_recipe(url)
        if recipe:
            data.append(recipe)
        time.sleep(1)  # Be polite to the server

    df = pd.DataFrame(data)
    df.to_csv('allrecipes_recipes.csv', index=False)
    print(f"✅ Saved {len(df)} recipes to 'allrecipes_recipes.csv'")

if __name__ == "__main__":
    main()


Fetching: https://www.allrecipes.com/recipes/78/breakfast-and-brunch/?page=1
Failed to fetch page 1
Found 0 recipe URLs.
✅ Saved 0 recipes to 'allrecipes_recipes.csv'


In [9]:
# STEP 1: Install required packages
!pip install requests beautifulsoup4 pandas

# STEP 2: Import necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import time

# STEP 3: Get recipe URLs from Tasty browse pages
def get_recipe_urls(pages=30):
    recipe_urls = set()
    for page in range(1, pages + 1):
        print(f"🔎 Fetching page {page}...")
        try:
            url = f"https://tasty.co/browse?page={page}"
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
            for a in soup.find_all('a', href=True):
                href = a['href']
                if href.startswith("/recipe/"):
                    full_url = "https://tasty.co" + href.split("?")[0]
                    recipe_urls.add(full_url)
            time.sleep(1)
        except Exception as e:
            print(f"❌ Failed on page {page}: {e}")
    return list(recipe_urls)

# STEP 4: Extract recipe data from JSON-LD
def extract_recipe_data(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        scripts = soup.find_all('script', type='application/ld+json')
        for script in scripts:
            try:
                data = json.loads(script.string)
                if isinstance(data, list):
                    for entry in data:
                        if entry.get('@type') == 'Recipe':
                            return entry
                elif data.get('@type') == 'Recipe':
                    return data
            except json.JSONDecodeError:
                continue
    except Exception as e:
        print(f"⚠️ Error scraping {url}: {e}")
    return None

# STEP 5: Scrape all data
recipe_urls = get_recipe_urls(pages=30)  # You can increase pages for more
print(f"✅ Found {len(recipe_urls)} unique recipe URLs.")

recipes = []
for idx, url in enumerate(recipe_urls):
    print(f"📦 Scraping {idx+1}/{len(recipe_urls)}: {url}")
    data = extract_recipe_data(url)
    if data:
        recipes.append({
            "Title": data.get("name", ""),
            "Ingredients": "\n".join(data.get("recipeIngredient", [])),
            "Instructions": data.get("recipeInstructions", ""),
            "Nutrition": data.get("nutrition", {}),
            "URL": url
        })
    time.sleep(1)  # Be polite

# STEP 6: Save to CSV
df = pd.DataFrame(recipes)
df.to_csv("tasty_recipes.csv", index=False)
print(f"\n✅ Scraped {len(df)} recipes and saved to 'tasty_recipes.csv'")
df.head()


🔎 Fetching page 1...
🔎 Fetching page 2...
🔎 Fetching page 3...
🔎 Fetching page 4...
🔎 Fetching page 5...
🔎 Fetching page 6...
🔎 Fetching page 7...
🔎 Fetching page 8...
🔎 Fetching page 9...
🔎 Fetching page 10...
🔎 Fetching page 11...
🔎 Fetching page 12...
🔎 Fetching page 13...
🔎 Fetching page 14...
🔎 Fetching page 15...
🔎 Fetching page 16...
🔎 Fetching page 17...
🔎 Fetching page 18...
🔎 Fetching page 19...
🔎 Fetching page 20...
🔎 Fetching page 21...
🔎 Fetching page 22...
🔎 Fetching page 23...
🔎 Fetching page 24...
🔎 Fetching page 25...
🔎 Fetching page 26...
🔎 Fetching page 27...
🔎 Fetching page 28...
🔎 Fetching page 29...
🔎 Fetching page 30...
✅ Found 0 unique recipe URLs.

✅ Scraped 0 recipes and saved to 'tasty_recipes.csv'


In [10]:
import requests
import pandas as pd
import time

# Replace with your actual app_id and app_key
app_id = 'your_app_id'
app_key = 'your_app_key'

# Define health conditions and corresponding Edamam health labels
health_conditions = {
    'diabetes': ['low-sugar'],
    'hypertension': ['low-sodium'],
    'celiac': ['gluten-free'],
    'kidney_disease': ['low-potassium'],
    'heart_disease': ['low-fat']
}

# Initialize an empty list to store recipe data
all_recipes = []

# Iterate over each health condition
for condition, labels in health_conditions.items():
    for label in labels:
        print(f"Fetching recipes for {condition} with label {label}...")
        url = 'https://api.edamam.com/search'
        params = {
            'q': '',  # Empty query to get a broad range of recipes
            'app_id': app_id,
            'app_key': app_key,
            'health': label,
            'from': 0,
            'to': 100  # Adjust as needed; max 100 per request
        }
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            for hit in data['hits']:
                recipe = hit['recipe']
                all_recipes.append({
                    'Title': recipe.get('label'),
                    'Ingredients': recipe.get('ingredientLines'),
                    'Calories': recipe.get('calories'),
                    'Total Weight': recipe.get('totalWeight'),
                    'Diet Labels': recipe.get('dietLabels'),
                    'Health Labels': recipe.get('healthLabels'),
                    'Cuisine Type': recipe.get('cuisineType'),
                    'Meal Type': recipe.get('mealType'),
                    'Dish Type': recipe.get('dishType'),
                    'URL': recipe.get('url')
                })
        else:
            print(f"Failed to fetch recipes for {condition} with label {label}. Status code: {response.status_code}")
        time.sleep(1)  # To respect API rate limits

# Convert the list to a DataFrame
df = pd.DataFrame(all_recipes)

# Save to CSV
df.to_csv('nutrifusion_recipes.csv', index=False)
print("Recipes saved to 'nutrifusion_recipes.csv'")


Fetching recipes for diabetes with label low-sugar...
Failed to fetch recipes for diabetes with label low-sugar. Status code: 404
Fetching recipes for hypertension with label low-sodium...
Failed to fetch recipes for hypertension with label low-sodium. Status code: 404
Fetching recipes for celiac with label gluten-free...
Failed to fetch recipes for celiac with label gluten-free. Status code: 404
Fetching recipes for kidney_disease with label low-potassium...
Failed to fetch recipes for kidney_disease with label low-potassium. Status code: 404
Fetching recipes for heart_disease with label low-fat...
Failed to fetch recipes for heart_disease with label low-fat. Status code: 404
Recipes saved to 'nutrifusion_recipes.csv'


In [11]:
# STEP 0: Install dependencies (for Google Colab or local)
!pip install requests pandas

# STEP 1: Import libraries
import requests
import pandas as pd
import time

# STEP 2: Add your Edamam credentials here
app_id = '59456ca4'  # <-- Replace this
app_key = 'ebeb4beaff17d1a14fcec62a904206a3'  # <-- Replace this

# STEP 3: Define health conditions and supported Edamam labels
health_conditions = {
    'diabetes': ['low-sugar', 'sugar-conscious'],
    'hypertension': ['low-fat-abs'],
    'celiac': ['gluten-free'],
    'kidney_disease': ['kidney-friendly', 'low-potassium'],
    'heart_disease': ['low-fat-abs', 'no-oil-added']
}

# STEP 4: Create an empty list for all recipes
all_recipes = []

# STEP 5: Loop through conditions and fetch recipes
for condition, labels in health_conditions.items():
    for label in labels:
        print(f"🔍 Fetching recipes for {condition} using label: {label}")

        for start in range(0, 100, 20):  # Edamam returns max 100; 20 per batch
            url = 'https://api.edamam.com/search'
            params = {
                'q': '',  # General query for variety
                'app_id': app_id,
                'app_key': app_key,
                'health': label,
                'from': start,
                'to': start + 20
            }

            response = requests.get(url, params=params)
            if response.status_code == 200:
                data = response.json()
                for hit in data['hits']:
                    recipe = hit['recipe']
                    all_recipes.append({
                        'Condition': condition,
                        'Title': recipe.get('label'),
                        'Ingredients': recipe.get('ingredientLines'),
                        'Calories': recipe.get('calories'),
                        'Total Weight': recipe.get('totalWeight'),
                        'Diet Labels': recipe.get('dietLabels'),
                        'Health Labels': recipe.get('healthLabels'),
                        'Cuisine Type': recipe.get('cuisineType'),
                        'Meal Type': recipe.get('mealType'),
                        'Dish Type': recipe.get('dishType'),
                        'URL': recipe.get('url')
                    })
            else:
                print(f"❌ Failed to fetch recipes for {condition} with label {label}. Status code: {response.status_code}")
            time.sleep(1)  # To avoid hitting rate limit

# STEP 6: Save results
df = pd.DataFrame(all_recipes)
df.to_csv('nutrifusion_recipes.csv', index=False)

# STEP 7: Output summary
print(f"✅ Scraped {len(df)} recipes and saved to 'nutrifusion_recipes.csv'")
df.head()


🔍 Fetching recipes for diabetes using label: low-sugar
❌ Failed to fetch recipes for diabetes with label low-sugar. Status code: 404
❌ Failed to fetch recipes for diabetes with label low-sugar. Status code: 404
❌ Failed to fetch recipes for diabetes with label low-sugar. Status code: 404
❌ Failed to fetch recipes for diabetes with label low-sugar. Status code: 404
❌ Failed to fetch recipes for diabetes with label low-sugar. Status code: 404
🔍 Fetching recipes for diabetes using label: sugar-conscious
❌ Failed to fetch recipes for diabetes with label sugar-conscious. Status code: 404
❌ Failed to fetch recipes for diabetes with label sugar-conscious. Status code: 404
❌ Failed to fetch recipes for diabetes with label sugar-conscious. Status code: 404
❌ Failed to fetch recipes for diabetes with label sugar-conscious. Status code: 404
❌ Failed to fetch recipes for diabetes with label sugar-conscious. Status code: 404
🔍 Fetching recipes for hypertension using label: low-fat-abs
❌ Failed to f

In [13]:
# Step 1: Install required packages
!pip install requests pandas

# Step 2: Import libraries
import requests
import pandas as pd
import time

# Step 3: Set your Spoonacular API key
api_key = '816057ea03c2490ab4329875887ce57b'  # <-- Replace with your key

# Step 4: Function to fetch detailed recipe info by ID
def get_recipe_info(recipe_id):
    url = f"https://api.spoonacular.com/recipes/{recipe_id}/information"
    params = {"includeNutrition": True, "apiKey": api_key}
    res = requests.get(url, params=params)
    if res.status_code == 200:
        return res.json()
    return None

# Step 5: Fetch 300+ recipes using bulk search
all_recipes = []
offset = 0
while len(all_recipes) < 300:
    url = "https://api.spoonacular.com/recipes/complexSearch"
    params = {
        "apiKey": api_key,
        "number": 50,
        "offset": offset,
        "addRecipeNutrition": True,
    }
    res = requests.get(url, params=params)
    data = res.json()

    if 'results' not in data:
        print("❌ Error fetching data:", data)
        break

    for item in data['results']:
        recipe = get_recipe_info(item['id'])
        if recipe:
            all_recipes.append({
                'ID': recipe['id'],
                'Title': recipe['title'],
                'ReadyInMinutes': recipe.get('readyInMinutes'),
                'Servings': recipe.get('servings'),
                'Vegetarian': recipe.get('vegetarian'),
                'Vegan': recipe.get('vegan'),
                'GlutenFree': recipe.get('glutenFree'),
                'DairyFree': recipe.get('dairyFree'),
                'Ingredients': [ing['original'] for ing in recipe.get('extendedIngredients', [])],
                'Calories': recipe.get('nutrition', {}).get('nutrients', [{}])[0].get('amount'),
                'Protein': next((n['amount'] for n in recipe['nutrition']['nutrients'] if n['title'] == 'Protein'), None),
                'Fat': next((n['amount'] for n in recipe['nutrition']['nutrients'] if n['title'] == 'Fat'), None),
                'Carbohydrates': next((n['amount'] for n in recipe['nutrition']['nutrients'] if n['title'] == 'Carbohydrates'), None),
                'Source URL': recipe.get('sourceUrl')
            })
    offset += 50
    time.sleep(1)  # Respect API rate limits

# Step 6: Save to CSV
df = pd.DataFrame(all_recipes)
df.to_csv("nutrifusion_recipes.csv", index=False)
print(f"✅ Saved {len(df)} recipes to 'nutrifusion_recipes.csv'")
df.head()




KeyError: 'title'

In [14]:
# Step 1: Install required packages
!pip install requests pandas

# Step 2: Import libraries
import requests
import pandas as pd
import time

# Step 3: Set your Spoonacular API key
api_key = '816057ea03c2490ab4329875887ce57b'  # <-- Replace with your key

# Step 4: Function to fetch detailed recipe info by ID
def get_recipe_info(recipe_id):
    url = f"https://api.spoonacular.com/recipes/{recipe_id}/information"
    params = {"includeNutrition": True, "apiKey": api_key}
    res = requests.get(url, params=params)
    if res.status_code == 200:
        return res.json()
    return None

# Step 5: Fetch 300+ recipes using bulk search
all_recipes = []
offset = 0
while len(all_recipes) < 300:
    url = "https://api.spoonacular.com/recipes/complexSearch"
    params = {
        "apiKey": api_key,
        "number": 50,
        "offset": offset,
        "addRecipeNutrition": True,
    }
    res = requests.get(url, params=params)
    data = res.json()

    if 'results' not in data:
        print("❌ Error fetching data:", data)
        break

    for item in data['results']:
        recipe = get_recipe_info(item['id'])
        if recipe:
            # Access nutrients safely
            nutrients = recipe.get('nutrition', {}).get('nutrients', [])

            all_recipes.append({
                'ID': recipe['id'],
                'Title': recipe['title'],
                'ReadyInMinutes': recipe.get('readyInMinutes'),
                'Servings': recipe.get('servings'),
                'Vegetarian': recipe.get('vegetarian'),
                'Vegan': recipe.get('vegan'),
                'GlutenFree': recipe.get('glutenFree'),
                'DairyFree': recipe.get('dairyFree'),
                'Ingredients': [ing['original'] for ing in recipe.get('extendedIngredients', [])],
                'Calories': next((n.get('amount') for n in nutrients if n.get('title') == 'Calories'), None), # Also update calories for consistency
                'Protein': next((n.get('amount') for n in nutrients if n.get('title') == 'Protein'), None),
                'Fat': next((n.get('amount') for n in nutrients if n.get('title') == 'Fat'), None),
                'Carbohydrates': next((n.get('amount') for n in nutrients if n.get('title') == 'Carbohydrates'), None),
                'Source URL': recipe.get('sourceUrl')
            })
    offset += 50
    time.sleep(1)  # Respect API rate limits

# Step 6: Save to CSV
df = pd.DataFrame(all_recipes)
df.to_csv("nutrifusion_recipes.csv", index=False)
print(f"✅ Saved {len(df)} recipes to 'nutrifusion_recipes.csv'")
df.head()

❌ Error fetching data: {'status': 'failure', 'code': 402, 'message': 'Your daily points limit of 150 has been reached. Please upgrade your plan to continue using the API.'}
✅ Saved 117 recipes to 'nutrifusion_recipes.csv'


Unnamed: 0,ID,Title,ReadyInMinutes,Servings,Vegetarian,Vegan,GlutenFree,DairyFree,Ingredients,Calories,Protein,Fat,Carbohydrates,Source URL
0,715415,Red Lentil Soup with Chicken and Turnips,55,8,False,False,True,True,"[additional toppings: diced avocado, micro gre...",,,,,https://www.pinkwhen.com/red-lentil-soup-with-...
1,716406,Asparagus and Pea Soup: Real Convenience Food,20,2,True,True,True,True,[1 bag of frozen organic asparagus (preferably...,,,,,https://fullbellysisters.blogspot.com/2011/03/...
2,644387,Garlicky Kale,45,2,True,True,True,True,"[3 tablespoons balsamic vinegar, 1 clove garli...",,,,,https://www.foodista.com/recipe/J2FTJBF7/garli...
3,715446,Slow Cooker Beef Stew,490,6,False,False,True,True,"[1 14.5oz can of Beef Broth, 2 large carrots, ...",,,,,https://www.pinkwhen.com/slow-cooker-beef-stew...
4,782601,Red Kidney Bean Jambalaya,45,6,True,True,True,True,"[2/3 cup dried brown rice (2 cups cooked), 2 m...",,,,,https://www.foodandspice.com/2016/05/red-kidne...


In [None]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import time

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
}

def get_recipe_links(base_url, max_links=10):
    print("Fetching recipe links...")
    links = set()
    page = 1

    while len(links) < max_links:
        url = f"{base_url}?page={page}"
        res = requests.get(url, headers=headers)
        soup = BeautifulSoup(res.content, 'html.parser')

        for a in soup.select('a.card__titleLink'):
            href = a.get('href')
            if href and '/recipe/' in href:
                links.add(href)
                if len(links) >= max_links:
                    break
        page += 1
        time.sleep(1)

    return list(links)

def scrape_allrecipes(url):
    try:
        res = requests.get(url, headers=headers)
        soup = BeautifulSoup(res.content, 'html.parser')
        script_tag = soup.find('script', type='application/ld+json')
        if not script_tag:
            return None

        data = json.loads(script_tag.string)
        if isinstance(data, list):
            data = data[0]
        nutrition = data.get('nutrition', {})
        ingredients = data.get('recipeIngredient', [])

        return {
            "Name": data.get('name'),
            "Ingredients": ", ".join(ingredients),
            "Calories": nutrition.get('calories'),
            "Fat": nutrition.get('fatContent'),
            "Carbohydrates": nutrition.get('carbohydrateContent'),
            "Protein": nutrition.get('proteinContent'),
            "URL": url
        }
    except Exception as e:
        print(f"Failed to scrape {url}: {e}")
        return None

# 🔗 Set category page to fetch from (e.g. "dinner", "chicken", etc.)
base_url = "https://www.allrecipes.com/recipes/201/meat-and-poultry/chicken/"  # you can change this
max_recipes = 10  # number of recipes to scrape

recipe_urls = get_recipe_links(base_url, max_recipes)
recipes = []

for url in recipe_urls:
    print(f"Scraping: {url}")
    data = scrape_allrecipes(url)
    if data:
        recipes.append(data)
    time.sleep(1)

df = pd.DataFrame(recipes)
df.to_excel("allrecipes_nutrifusion_dataset.xlsx", index=False)

print("✅ Done! Data saved to 'allrecipes_nutrifusion_dataset.xlsx'.")


Fetching recipe links...
