In [10]:
import requests
from bs4 import BeautifulSoup

def get_recipe_links(base_url):
    recipe_links = []
    page = 1  # Starting page number
    max_pages = 5  # Prevent infinite loops by limiting the number of pages
    iterations = 0

    while True:
        if iterations >= max_pages:
            print("Reached maximum pages. Exiting to prevent infinite loop.")
            break

        current_url = f"{base_url}?query-9-page={page}"
        try:
            response = requests.get(current_url)
            response.raise_for_status()  
            soup = BeautifulSoup(response.text, 'html.parser')
        except requests.exceptions.RequestException as e:
            print(f"Error fetching {current_url}: {e}")
            break  # Exit if there was an error

        found_links = False
        # Find all recipe links
        for link in soup.find_all('a', href=True):
            if '/autumnrosewood.wordpress.com/' in link['href']:
                absolute_url = f"{link['href']}"
                if absolute_url not in recipe_links:  # Avoid duplicates
                    recipe_links.append(absolute_url)
                    found_links = True

        print(f"Fetched {len(recipe_links)} links from {current_url}")

        # Increment page for the next load
        page += 1  
        iterations += 1  

        # Check if we found any new links
        if not found_links:
            print("No new links found. Exiting.")
            break

    return recipe_links  

if __name__ == "__main__":
    base_url = 'https://autumnrosewood.wordpress.com/blog-posts/'
    recipe_links = get_recipe_links(base_url)

    for url in recipe_links:
        print(url)

    # Save the links to a file
    with open('recipe_links.txt', 'w', encoding='utf-8') as f:
        for url in recipe_links:
            f.write(url + '\n')

    print(f"Total recipe links collected: {len(recipe_links)}")


Fetched 16 links from https://autumnrosewood.wordpress.com/blog-posts/?query-9-page=1
Fetched 22 links from https://autumnrosewood.wordpress.com/blog-posts/?query-9-page=2
Fetched 28 links from https://autumnrosewood.wordpress.com/blog-posts/?query-9-page=3
Fetched 32 links from https://autumnrosewood.wordpress.com/blog-posts/?query-9-page=4
Fetched 32 links from https://autumnrosewood.wordpress.com/blog-posts/?query-9-page=5
No new links found. Exiting.
https://autumnrosewood.wordpress.com/blog-posts/
https://autumnrosewood.wordpress.com/category/cakes/
https://autumnrosewood.wordpress.com/category/breads/
https://autumnrosewood.wordpress.com/category/cookies/
https://autumnrosewood.wordpress.com/category/pies/
https://autumnrosewood.wordpress.com/category/breakfast/
https://autumnrosewood.wordpress.com/books/
https://autumnrosewood.wordpress.com/about/
https://autumnrosewood.wordpress.com/2024/09/26/farmhouse-granola-bars-a-slice-of-home-in-every-bite/
https://autumnrosewood.wordpres

In [11]:
links_string = """
https://autumnrosewood.wordpress.com/2024/09/26/farmhouse-granola-bars-a-slice-of-home-in-every-bite/
https://autumnrosewood.wordpress.com/2024/09/19/cozy-up-with-my-familys-cottagecore-chili-recipe/
https://autumnrosewood.wordpress.com/2024/09/16/my-go-to-playlist-for-cozy-baking-days/
https://autumnrosewood.wordpress.com/2024/09/15/cozy-autumn-vibes-a-playlist-to-embrace-the-season/
https://autumnrosewood.wordpress.com/2024/09/13/pumpkin-pancakes-my-favorite-cozy-fall-breakfast/
https://autumnrosewood.wordpress.com/2024/09/05/embrace-the-cozy-magic-of-autumn-cottagecore-activities-to-try/
https://autumnrosewood.wordpress.com/wp-admin/site-editor.php?postType=wp_template&postId=pub/nook//page
https://wordpress.com/abuse/?report_url=https://autumnrosewood.wordpress.com/blog-posts/
https://autumnrosewood.wordpress.com/2024/09/05/english-peach-scones-a-taste-of-peach-season/
https://autumnrosewood.wordpress.com/2024/09/03/cozy-up-with-butternut-squash-and-apple-soup/
https://autumnrosewood.wordpress.com/2024/08/16/exciting-news-launch-of-a-cottage-in-the-meadow/
https://autumnrosewood.wordpress.com/2024/08/06/embrace-the-cozy-comfort-of-easy-apple-crisp/
https://autumnrosewood.wordpress.com/2024/08/06/creating-a-cozy-cottagecore-kitchen-essential-items-for-rustic-charm/
https://autumnrosewood.wordpress.com/2024/07/30/welcome-to-share-a-poem/
https://autumnrosewood.wordpress.com/2024/07/29/build-your-own-herbal-medicine-cabinet-top-9-must-have-herbs-and-their-uses/
https://autumnrosewood.wordpress.com/2024/07/24/nanas-country-pancakes/
https://autumnrosewood.wordpress.com/2024/07/14/strawberry-bread-pudding-a-cozy-cottagecore-delight/
https://autumnrosewood.wordpress.com/2024/07/13/nurture-your-hair-naturally-the-benefits-of-lavender-rosemary-and-peppermint/
https://autumnrosewood.wordpress.com/2024/07/13/cottagecore-living-ideas-for-a-cozy-whimsical-life/
https://autumnrosewood.wordpress.com/2024/07/12/cottagecore-picnic-essentials-a-guide-to-the-perfect-outdoor-escape/
https://autumnrosewood.wordpress.com/2024/07/12/a-taste-of-summer-homemade-blueberry-muffins/
https://autumnrosewood.wordpress.com/2024/07/12/indulge-in-rustic-charm-with-cottagecore-fudgy-brownies/
https://autumnrosewood.wordpress.com/2024/07/09/homemade-banana-bread/
https://autumnrosewood.wordpress.com/2024/07/09/embracing-the-sweet-simplicity-honey-lavender-cookies/
"""
links = [link.strip() for link in links_string.strip().split('\n') if link.strip()]

formatted_links = ',\n'.join([f'"{link}"' for link in links])

print(formatted_links)

"https://autumnrosewood.wordpress.com/2024/09/26/farmhouse-granola-bars-a-slice-of-home-in-every-bite/",
"https://autumnrosewood.wordpress.com/2024/09/19/cozy-up-with-my-familys-cottagecore-chili-recipe/",
"https://autumnrosewood.wordpress.com/2024/09/16/my-go-to-playlist-for-cozy-baking-days/",
"https://autumnrosewood.wordpress.com/2024/09/15/cozy-autumn-vibes-a-playlist-to-embrace-the-season/",
"https://autumnrosewood.wordpress.com/2024/09/13/pumpkin-pancakes-my-favorite-cozy-fall-breakfast/",
"https://autumnrosewood.wordpress.com/2024/09/05/embrace-the-cozy-magic-of-autumn-cottagecore-activities-to-try/",
"https://autumnrosewood.wordpress.com/wp-admin/site-editor.php?postType=wp_template&postId=pub/nook//page",
"https://wordpress.com/abuse/?report_url=https://autumnrosewood.wordpress.com/blog-posts/",
"https://autumnrosewood.wordpress.com/2024/09/05/english-peach-scones-a-taste-of-peach-season/",
"https://autumnrosewood.wordpress.com/2024/09/03/cozy-up-with-butternut-squash-and-appl

In [16]:
import requests
from bs4 import BeautifulSoup
import os



def scrape_images(url, folder_name='images_autum_rose_wood'):
    headers = {        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
} #Make it apper its coming from a real browser
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return

    soup = BeautifulSoup(response.content, 'html.parser')

    img_tags = soup.find_all('img')

    for img in img_tags:
        img_url = img.get('src')
        
        if img_url.startswith('/'):
            img_url = url + img_url
        
        try:
            img_data = requests.get(img_url, headers=headers).content
            img_name = os.path.join(folder_name, img_url.split('/')[-1])
            
            with open(img_name, 'wb') as f:
                f.write(img_data)
            print(f"Downloaded: {img_name}")
        except Exception as e:
            print(f"Could not download {img_url}. Reason: {e}")

urls = ["https://autumnrosewood.wordpress.com/2024/09/26/farmhouse-granola-bars-a-slice-of-home-in-every-bite/",
"https://autumnrosewood.wordpress.com/2024/09/19/cozy-up-with-my-familys-cottagecore-chili-recipe/",
"https://autumnrosewood.wordpress.com/2024/09/16/my-go-to-playlist-for-cozy-baking-days/",
"https://autumnrosewood.wordpress.com/2024/09/15/cozy-autumn-vibes-a-playlist-to-embrace-the-season/",
"https://autumnrosewood.wordpress.com/2024/09/13/pumpkin-pancakes-my-favorite-cozy-fall-breakfast/",
"https://autumnrosewood.wordpress.com/2024/09/05/embrace-the-cozy-magic-of-autumn-cottagecore-activities-to-try/",
"https://autumnrosewood.wordpress.com/wp-admin/site-editor.php?postType=wp_template&postId=pub/nook//page",
"https://wordpress.com/abuse/?report_url=https://autumnrosewood.wordpress.com/blog-posts/",
"https://autumnrosewood.wordpress.com/2024/09/05/english-peach-scones-a-taste-of-peach-season/",
"https://autumnrosewood.wordpress.com/2024/09/03/cozy-up-with-butternut-squash-and-apple-soup/",
"https://autumnrosewood.wordpress.com/2024/08/16/exciting-news-launch-of-a-cottage-in-the-meadow/",
"https://autumnrosewood.wordpress.com/2024/08/06/embrace-the-cozy-comfort-of-easy-apple-crisp/",
"https://autumnrosewood.wordpress.com/2024/08/06/creating-a-cozy-cottagecore-kitchen-essential-items-for-rustic-charm/",
"https://autumnrosewood.wordpress.com/2024/07/30/welcome-to-share-a-poem/",
"https://autumnrosewood.wordpress.com/2024/07/29/build-your-own-herbal-medicine-cabinet-top-9-must-have-herbs-and-their-uses/",
"https://autumnrosewood.wordpress.com/2024/07/24/nanas-country-pancakes/",
"https://autumnrosewood.wordpress.com/2024/07/14/strawberry-bread-pudding-a-cozy-cottagecore-delight/",
"https://autumnrosewood.wordpress.com/2024/07/13/nurture-your-hair-naturally-the-benefits-of-lavender-rosemary-and-peppermint/",
"https://autumnrosewood.wordpress.com/2024/07/13/cottagecore-living-ideas-for-a-cozy-whimsical-life/",
"https://autumnrosewood.wordpress.com/2024/07/12/cottagecore-picnic-essentials-a-guide-to-the-perfect-outdoor-escape/",
"https://autumnrosewood.wordpress.com/2024/07/12/a-taste-of-summer-homemade-blueberry-muffins/",
"https://autumnrosewood.wordpress.com/2024/07/12/indulge-in-rustic-charm-with-cottagecore-fudgy-brownies/",
"https://autumnrosewood.wordpress.com/2024/07/09/homemade-banana-bread/",
"https://autumnrosewood.wordpress.com/2024/07/09/embracing-the-sweet-simplicity-honey-lavender-cookies/"]

for url in urls:
    scrape_images(url)


Could not download https://autumnrosewood.wordpress.com/wp-content/uploads/2024/09/black-white-minimalist-aesthetic-cleaning-list-1-1.png?w=576. Reason: [Errno 22] Invalid argument: 'images_autum_rose_wood\\black-white-minimalist-aesthetic-cleaning-list-1-1.png?w=576'
