In [1]:
import requests
from bs4 import BeautifulSoup

# Target URL
url = "https://www.seriouseats.com/"

# Headers to mimic a browser visit
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

# Send request to the page
response = requests.get(url, headers=headers)

# Check for successful response
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find articles (based on current HTML structure)
    articles = soup.find_all('a', class_='comp card')

    # Extract and print titles and links
    for article in articles:
        title = article.get('aria-label')
        link = article.get('href')
        if title and link:
            print(f"Title: {title}")
            print(f"Link: https://www.seriouseats.com{link}\n")
else:
    print(f"Failed to retrieve page. Status code: {response.status_code}")


In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Target URL
url = "https://www.seriouseats.com/"

# Set headers to mimic a browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

# Send GET request
response = requests.get(url, headers=headers)

# Initialize list to store data
articles_data = []

# Check if request was successful
if response.status_code == 200:
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find article cards
    articles = soup.find_all('a', class_='comp card')

    for article in articles:
        title = article.get('aria-label')
        link = article.get('href')
        if title and link:
            full_link = f"https://www.seriouseats.com{link}" if link.startswith('/') else link
            articles_data.append({
                'Title': title,
                'URL': full_link
            })

    # Save to CSV using pandas
    df = pd.DataFrame(articles_data)
    df.to_csv('serious_eats_articles.csv', index=False, encoding='utf-8')
    print("✅ Data saved to serious_eats_articles.csv")

else:
    print(f"❌ Failed to retrieve the page. Status code: {response.status_code}")


✅ Data saved to serious_eats_articles.csv


In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

headers = {
    "User-Agent": "Mozilla/5.0"
}

# Starting from a listing page (can change to any category)
listing_url = "https://www.seriouseats.com/recipes"

response = requests.get(listing_url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

# Step 1: Collect recipe links
recipe_links = []
for card in soup.find_all("a", class_="comp card"):
    href = card.get("href")
    if href and href.startswith("/recipes"):
        full_url = f"https://www.seriouseats.com{href}"
        if full_url not in recipe_links:
            recipe_links.append(full_url)

# Step 2: Visit each recipe and extract data
recipes = []

for url in recipe_links[:10]:  # limit to first 10 recipes
    print(f"Scraping: {url}")
    try:
        res = requests.get(url, headers=headers)
        recipe_soup = BeautifulSoup(res.text, "html.parser")

        title = recipe_soup.find("h1").get_text(strip=True)

        ingredients = [tag.get_text(strip=True) for tag in recipe_soup.select("[data-ingredient]")]
        instructions = [tag.get_text(strip=True) for tag in recipe_soup.select("[data-instruction]")]

        # Try to find nutrition section — commonly under 'Nutrition Info' heading
        nutrition = ""
        possible_nutrition = recipe_soup.find_all(string=lambda s: "Nutrition" in s or "calories" in s.lower())
        for string in possible_nutrition:
            parent = string.find_parent()
            if parent and parent.name in ['p', 'div', 'section']:
                nutrition = parent.get_text(strip=True)
                break

        recipes.append({
            "Title": title,
            "URL": url,
            "Ingredients": "; ".join(ingredients),
            "Instructions": " ".join(instructions),
            "Nutrition": nutrition or "Not available"
        })

        time.sleep(1)  # polite delay

    except Exception as e:
        print(f"❌ Failed on {url}: {e}")

# Step 3: Save to CSV
df = pd.DataFrame(recipes)
df.to_csv("serious_eats_recipes_with_nutrition.csv", index=False, encoding='utf-8')
print("✅ Saved to serious_eats_recipes_with_nutrition.csv")


✅ Saved to serious_eats_recipes_with_nutrition.csv


In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

headers = {
    "User-Agent": "Mozilla/5.0"
}

# ✅ Use a few known recipe URLs (you can expand this list)
recipe_urls = [
    "https://www.seriouseats.com/perfect-scrambled-eggs-recipe",
    "https://www.seriouseats.com/homemade-marinara-sauce-recipe",
    "https://www.seriouseats.com/the-best-chocolate-chip-cookies",
]

recipes = []

for url in recipe_urls:
    print(f"Scraping: {url}")
    try:
        res = requests.get(url, headers=headers)
        soup = BeautifulSoup(res.text, "html.parser")

        title = soup.find("h1").get_text(strip=True)

        # Ingredients
        ingredients = [tag.get_text(strip=True) for tag in soup.select("[data-ingredient]")]
        if not ingredients:
            # Fallback: some pages use <ul><li>
            ingredients = [li.get_text(strip=True) for li in soup.select("ul li") if "cup" in li.text or "teaspoon" in li.text or "tablespoon" in li.text]

        # Instructions
        instructions = [tag.get_text(strip=True) for tag in soup.select("[data-instruction]")]
        if not instructions:
            instructions = [p.get_text(strip=True) for p in soup.select("div.mntl-sc-block")]

        # Nutrition (loose search)
        nutrition = ""
        nutrition_section = soup.find(string=lambda s: s and ("calories" in s.lower() or "nutrition" in s.lower()))
        if nutrition_section:
            parent = nutrition_section.find_parent()
            if parent:
                nutrition = parent.get_text(strip=True)

        recipes.append({
            "Title": title,
            "URL": url,
            "Ingredients": "; ".join(ingredients),
            "Instructions": " ".join(instructions),
            "Nutrition": nutrition or "Not available"
        })

        time.sleep(1)

    except Exception as e:
        print(f"Failed to scrape {url}: {e}")

# Save to CSV
df = pd.DataFrame(recipes)
df.to_csv("serious_eats_recipes_with_nutrition.csv", index=False, encoding='utf-8')
print("✅ Data saved to serious_eats_recipes_with_nutrition.csv")


Scraping: https://www.seriouseats.com/perfect-scrambled-eggs-recipe
Failed to scrape https://www.seriouseats.com/perfect-scrambled-eggs-recipe: 'NoneType' object has no attribute 'get_text'
Scraping: https://www.seriouseats.com/homemade-marinara-sauce-recipe
Failed to scrape https://www.seriouseats.com/homemade-marinara-sauce-recipe: 'NoneType' object has no attribute 'get_text'
Scraping: https://www.seriouseats.com/the-best-chocolate-chip-cookies
Failed to scrape https://www.seriouseats.com/the-best-chocolate-chip-cookies: 'NoneType' object has no attribute 'get_text'
✅ Data saved to serious_eats_recipes_with_nutrition.csv


In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

headers = {
    "User-Agent": "Mozilla/5.0"
}

# ✅ Use known Yummly recipe URLs
recipe_urls = [
    "https://www.yummly.com/recipe/Slow-Cooker-Chicken-Tikka-Masala-2335868",
    "https://www.yummly.com/recipe/Easy-Honey-Garlic-Salmon-2589841",
    "https://www.yummly.com/recipe/Perfect-Pan-Seared-Steak-2637013"
]

recipes = []

for url in recipe_urls:
    print(f"Scraping: {url}")
    try:
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")

        # Title
        title = soup.find("h1").get_text(strip=True)

        # Ingredients
        ingredients = [i.get_text(strip=True) for i in soup.select("li.ingredient")]
        if not ingredients:
            ingredients = [i.get_text(strip=True) for i in soup.select("[data-testid='Ingredient']")]

        # Instructions
        instructions = [s.get_text(strip=True) for s in soup.select("[data-testid='Step']")]

        # Nutrition Info (loose match)
        nutrition = soup.find(string=lambda s: s and "calories" in s.lower())
        if nutrition:
            nutrition = nutrition.find_parent().get_text(strip=True)
        else:
            nutrition = "Not available"

        recipes.append({
            "Title": title,
            "URL": url,
            "Ingredients": "; ".join(ingredients),
            "Instructions": " ".join(instructions),
            "Nutrition": nutrition
        })

        time.sleep(1)

    except Exception as e:
        print(f"❌ Failed to scrape {url}: {e}")

# Save to CSV
df = pd.DataFrame(recipes)
df.to_csv("yummly_recipes.csv", index=False, encoding='utf-8')
print("✅ Data saved to yummly_recipes.csv")


Scraping: https://www.yummly.com/recipe/Slow-Cooker-Chicken-Tikka-Masala-2335868
❌ Failed to scrape https://www.yummly.com/recipe/Slow-Cooker-Chicken-Tikka-Masala-2335868: 'NoneType' object has no attribute 'get_text'
Scraping: https://www.yummly.com/recipe/Easy-Honey-Garlic-Salmon-2589841
❌ Failed to scrape https://www.yummly.com/recipe/Easy-Honey-Garlic-Salmon-2589841: 'NoneType' object has no attribute 'get_text'
Scraping: https://www.yummly.com/recipe/Perfect-Pan-Seared-Steak-2637013
❌ Failed to scrape https://www.yummly.com/recipe/Perfect-Pan-Seared-Steak-2637013: 'NoneType' object has no attribute 'get_text'
✅ Data saved to yummly_recipes.csv


In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

headers = {
    "User-Agent": "Mozilla/5.0"
}

# ✅ Known working Yummly recipe URLs (tested as of May 2025)
recipe_urls = [
    "https://www.yummly.com/recipe/Creamy-Garlic-Chicken-2626706",
    "https://www.yummly.com/recipe/Slow-Cooker-Chicken-Tikka-Masala-2335868",
    "https://www.yummly.com/recipe/Easy-Honey-Garlic-Salmon-2589841"
]

recipes = []

for url in recipe_urls:
    print(f"Scraping: {url}")
    try:
        res = requests.get(url, headers=headers)
        soup = BeautifulSoup(res.text, "html.parser")

        # Title
        title_tag = soup.find("h1")
        title = title_tag.get_text(strip=True) if title_tag else "N/A"

        # Ingredients (data-testid structure)
        ingredients = [i.get_text(strip=True) for i in soup.select("[data-testid='Ingredient']")]
        if not ingredients:
            ingredients = [i.get_text(strip=True) for i in soup.select("li.ingredient")]

        # Instructions
        instructions = [s.get_text(strip=True) for s in soup.select("[data-testid='Step']")]
        if not instructions:
            instructions = [p.get_text(strip=True) for p in soup.select("ol li")]

        # Nutrition (try to find a calories string)
        nutrition = "Not available"
        nutrition_section = soup.find("div", class_="nutrition")
        if nutrition_section:
            nutrition = nutrition_section.get_text(strip=True)
        else:
            # Try alternate
            match = soup.find(string=lambda t: "calories" in t.lower())
            if match:
                nutrition = match.find_parent().get_text(strip=True)

        recipes.append({
            "Title": title,
            "URL": url,
            "Ingredients": "; ".join(ingredients),
            "Instructions": " ".join(instructions),
            "Nutrition": nutrition
        })

        time.sleep(1)

    except Exception as e:
        print(f"❌ Failed to scrape {url}: {e}")

# Save to CSV
df = pd.DataFrame(recipes)
df.to_excel("yummly_recipes.xlsx", index=False, engine='openpyxl')

print("✅ Data saved to yummly_recipes.csv")


Scraping: https://www.yummly.com/recipe/Creamy-Garlic-Chicken-2626706
Scraping: https://www.yummly.com/recipe/Slow-Cooker-Chicken-Tikka-Masala-2335868
Scraping: https://www.yummly.com/recipe/Easy-Honey-Garlic-Salmon-2589841
✅ Data saved to yummly_recipes.csv


In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

headers = {
    "User-Agent": "Mozilla/5.0"
}

base_search_url = "https://www.yummly.com/recipes?page={page}"

def get_recipe_links(page_num):
    url = base_search_url.format(page=page_num)
    print(f"Fetching search page: {url}")
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.text, "html.parser")

    # Find recipe links on the search page
    links = []
    for a in soup.select("a[href*='/recipe/']"):
        href = a.get("href")
        if href and href.startswith("/recipe/"):
            full_url = "https://www.yummly.com" + href.split('?')[0]
            links.append(full_url)
    # Remove duplicates
    return list(set(links))

def scrape_recipe(url):
    print(f"Scraping recipe: {url}")
    try:
        r = requests.get(url, headers=headers)
        soup = BeautifulSoup(r.text, "html.parser")

        title_tag = soup.find("h1")
        title = title_tag.get_text(strip=True) if title_tag else "N/A"

        ingredients = [i.get_text(strip=True) for i in soup.select("[data-testid='Ingredient']")]
        if not ingredients:
            ingredients = [i.get_text(strip=True) for i in soup.select("li.ingredient")]

        instructions = [s.get_text(strip=True) for s in soup.select("[data-testid='Step']")]
        if not instructions:
            instructions = [p.get_text(strip=True) for p in soup.select("ol li")]

        nutrition = "Not available"
        nutrition_section = soup.find("div", class_="nutrition")
        if nutrition_section:
            nutrition = nutrition_section.get_text(strip=True)
        else:
            match = soup.find(string=lambda t: t and "calories" in t.lower())
            if match:
                nutrition = match.find_parent().get_text(strip=True)

        return {
            "Title": title,
            "URL": url,
            "Ingredients": "; ".join(ingredients),
            "Instructions": " ".join(instructions),
            "Nutrition": nutrition
        }
    except Exception as e:
        print(f"Failed to scrape {url}: {e}")
        return None

all_recipes = []
pages_to_scrape = 50  # Adjust as needed to get 200+ recipes

for page in range(1, pages_to_scrape + 1):
    recipe_links = get_recipe_links(page)
    print(f"Found {len(recipe_links)} recipes on page {page}")

    for link in recipe_links:
        data = scrape_recipe(link)
        if data:
            all_recipes.append(data)
        time.sleep(1)  # Be polite

    if len(all_recipes) >= 200:
        print("Reached 200 recipes, stopping.")
        break

# Save all data to CSV
df = pd.DataFrame(all_recipes)
df.to_csv("yummly_200_recipes.csv", index=False, encoding='utf-8')
print(f"✅ Saved {len(all_recipes)} recipes to yummly_200_recipes.csv")


Fetching search page: https://www.yummly.com/recipes?page=1
Found 0 recipes on page 1
Fetching search page: https://www.yummly.com/recipes?page=2
Found 0 recipes on page 2
Fetching search page: https://www.yummly.com/recipes?page=3
Found 0 recipes on page 3
Fetching search page: https://www.yummly.com/recipes?page=4
Found 0 recipes on page 4
Fetching search page: https://www.yummly.com/recipes?page=5
Found 0 recipes on page 5
Fetching search page: https://www.yummly.com/recipes?page=6
Found 0 recipes on page 6
Fetching search page: https://www.yummly.com/recipes?page=7
Found 0 recipes on page 7
Fetching search page: https://www.yummly.com/recipes?page=8
Found 0 recipes on page 8
Fetching search page: https://www.yummly.com/recipes?page=9
Found 0 recipes on page 9
Fetching search page: https://www.yummly.com/recipes?page=10
Found 0 recipes on page 10
Fetching search page: https://www.yummly.com/recipes?page=11
Found 0 recipes on page 11
Fetching search page: https://www.yummly.com/recip