In [1]:
import requests
from bs4 import BeautifulSoup, SoupStrainer

In [7]:
base_url = "https://minimalistbaker.com/recipe-index/"
page_param = "?fwp_paged="

# Function to get recipe links from a single page
def extract_links_from_page(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    recipe_links = []

    # Find all recipe links
    for link in soup.find_all("a", class_="post-summary__image", href=True):
        recipe_links.append(link['href'])

    return recipe_links

In [8]:
# Main function to scrape all pages
def scrape_all_recipe_links(total_pages):
    all_recipe_links = []
    for page in range(1, total_pages+1):
        page_url = f"{base_url}{page_param}{page}"
        print(f"Scraping: {page_url}")
        links = extract_links_from_page(page_url)
        all_recipe_links.extend(links)

    return all_recipe_links


pages_total = 86

recipe_links = scrape_all_recipe_links(pages_total)
print(f"Total recipe links extracted: {len(recipe_links)}")
              
for link in recipe_links:
    print(link)

Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=1
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=2
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=3
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=4
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=5
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=6
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=7
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=8
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=9
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=10
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=11
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=12
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=13
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=14
Scraping: https://minimalistbaker.com/recipe-index/?fwp_paged=15
Scraping: https://minimalistbaker.

In [9]:
{len(recipe_links)}

{1719}

In [10]:
# The function to scrape the title and paragraphs
def scrape_recipe_details(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract the title (h1 inside the header)
    title = soup.find("h1", class_="entry-title").get_text(strip=True) if soup.find("h1", class_="entry-title") else "No title found"

    # Extract all paragraphs (p tags) under the content section
    paragraphs = []
    content_section = soup.find("div", class_="entry-content jpibfi_container")
    if content_section:
        for p in content_section.find_all("p"):
            paragraphs.append(p.get_text(strip=True))

    # Return the scraped data
    return {
        "title": title,
        "paragraphs": paragraphs
    }

In [None]:
# Function to scrape title from a single URL
def scrape_title(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Extract the title (h1 inside the header)
        title = soup.find("h1", class_="entry-title").get_text(strip=True) if soup.find("h1", class_="entry-title") else "No title found"
        return title
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return None

# Iterate through the list and scrape each URL
titles = []
for link in recipe_links:
    print(f"Scraping: {link}")
    title = scrape_title(link)
    if title:
        titles.append(title)

# Print the scraped titles
for i, title in enumerate(titles, start=1):
    print(f"{i}. {title}")

Scraping: https://minimalistbaker.com/creamy-vegan-white-bean-chili/
Scraping: https://minimalistbaker.com/easy-winter-fruit-salad/
Scraping: https://minimalistbaker.com/chocolate-tahini-caramels-no-candy-thermometer/
Scraping: https://minimalistbaker.com/chocolate-pecan-shortbread-cookies-vegan-gf/
Scraping: https://minimalistbaker.com/garlic-herb-roasted-delicata-squash/
Scraping: https://minimalistbaker.com/cranberry-orange-scones-vegan-gf/
Scraping: https://minimalistbaker.com/easy-gluten-free-cornbread-1-bowl/
Scraping: https://minimalistbaker.com/1-pot-spicy-pumpkin-tomato-soup/
Scraping: https://minimalistbaker.com/cozy-curry-noodle-soup-thai-inspired/
Scraping: https://minimalistbaker.com/fluffy-pumpkin-oat-cookies/
Scraping: https://minimalistbaker.com/vegan-pumpkin-spice-frosting/
Scraping: https://minimalistbaker.com/roasted-fall-vegetable-salad-with-white-beans/
Scraping: https://minimalistbaker.com/apple-butter-snickerdoodle-cookies/
Scraping: https://minimalistbaker.com/v

In [17]:
titles[111]

'Spicy Mushroom Black Bean Fritters'

In [20]:
# Scrape content from the paragraphs
## Function to scrape a paragraph from a single url
def scrape_paragraph(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        # Extract the paragraphs
        paragraphs = soup.find_all("p")
        content = " ".join([para.get_text(strip=True) for para in paragraphs])

        return content
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return None, None

link_url = "https://minimalistbaker.com/chicken-kofta-kebabs/"  
paragraph = scrape_paragraph(link_url)
paragraph

'Minimalist Baker Disclosure: This post may contain affiliate links which provide us a small commission when used for purchase. We\'re grateful for your support! Grilling season is here, and today we’re bringing you an EASY, flavorful addition to your grilling rotation: kofta kebabs! These Middle Eastern-inspired kebabs feature ground chicken, herbs, spices, and aromatics arranged on skewers and grilled to perfection. They’re elegant yet simple and are especially delicious served withlemon rice,hummus,tahini sauce,tabbouleh,grilled veggies, and/or pita. Just1 bowl,10 ingredients, and25 minutesrequired! Let’s make kofta kebabs! Kofta, also spelledkefta,kafta, orkofterefers to meatball- or meatloaf-style dishes made from ground meat mixed with spices and other ingredients. They can be shaped into balls, patties, or cylinders. There are many variations of the dish, and it’s enjoyed in regions ranging from the Middle East to North Africa, South Asia, and beyond. The first recipes for kofta

In [21]:
# Combined codes to scrap both title and paragraph contents

def scrape_recipe_info(url):
    try:
        response = requests.get(url)
        response.raise_for_status()

        # Parse the content with BeautifulSoup 
        soup = BeautifulSoup(response.text, "html.parser")

        # Extract the title
        title = soup.find("h1", class_="entry-title").get_text(strip=True) if soup.find("h1", class_="entry-title") else "No title found"
        
        # Extract all paragraphs in the page
        paragraphs = soup.find_all("p")
        content = " ".join([para.get_text(strip=True) for para in paragraphs])

        return title, content
    
    except Exception as e:
        print(f"Error scraping {url}: {e}")
        return None, None
    
# Iterate through the list of URLs and scrape each 
recipes_info = []
for link in recipe_links:
    print(f"Scraping: {link}")
    title, content = scrape_recipe_info(link)
    if title and content:
        recipes_info.append({"title": title, "content": content})



Scraping: https://minimalistbaker.com/creamy-vegan-white-bean-chili/
Scraping: https://minimalistbaker.com/easy-winter-fruit-salad/
Scraping: https://minimalistbaker.com/chocolate-tahini-caramels-no-candy-thermometer/
Scraping: https://minimalistbaker.com/chocolate-pecan-shortbread-cookies-vegan-gf/
Scraping: https://minimalistbaker.com/garlic-herb-roasted-delicata-squash/
Scraping: https://minimalistbaker.com/cranberry-orange-scones-vegan-gf/
Scraping: https://minimalistbaker.com/easy-gluten-free-cornbread-1-bowl/
Scraping: https://minimalistbaker.com/1-pot-spicy-pumpkin-tomato-soup/
Scraping: https://minimalistbaker.com/cozy-curry-noodle-soup-thai-inspired/
Scraping: https://minimalistbaker.com/fluffy-pumpkin-oat-cookies/
Scraping: https://minimalistbaker.com/vegan-pumpkin-spice-frosting/
Scraping: https://minimalistbaker.com/roasted-fall-vegetable-salad-with-white-beans/
Scraping: https://minimalistbaker.com/apple-butter-snickerdoodle-cookies/
Scraping: https://minimalistbaker.com/v

In [22]:
## Print the results
for i, recipe in enumerate(recipes_info, start=1):
    print(f"\nRecipe {i}: {recipe['title']}")
    print(f"Content: {recipe['content'][:500]}...")


Recipe 1: Creamy Vegan White Bean Chili
Content: Minimalist Baker Disclosure: This post may contain affiliate links which provide us a small commission when used for purchase. We're grateful for your support! We love aclassic (tomato-based) chili, but a CREAMY chili? It’s hard to say no to! Mix up your chili rotation with this creamy vegan white bean chili with spicy green chiles, sweet corn, and nutrient-packed spinach or kale. It’s a cozy, subtly spiced, nourishing meal that comes together in just30 minutes. Let us show you how it’s done! Th...

Recipe 2: Easy Winter Fruit Salad (1 Bowl!)
Content: Minimalist Baker Disclosure: This post may contain affiliate links which provide us a small commission when used for purchase. We're grateful for your support! Winter fruits aresurvivors, making it through the coldest days of the year. We had to show them some appreciation. Enter this simple, vibrant winter fruit salad! It almost feelstoosimple to be a recipe (1 bowl,5 ingredients,15 minut

In [23]:
import json
# Save the scraped data into JSON file
with open("scraped_recipes.json", "w", encoding="utf-8") as file:
    json.dump(recipes_info, file, ensure_ascii=False, indent=4)

print("Scraping completed. Data saved to 'scraped_recipes.json'.")

Scraping completed. Data saved to 'scraped_recipes.json'.
