In [5]:
import requests
from bs4 import BeautifulSoup
import os

def scrape_images(url, folder, max_images=150):
    os.makedirs(folder, exist_ok=True)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find all divs with the specific class for photo cards
    photo_cards = soup.find_all('div', class_='hz-photo-card hz-track-me')
    
    count = 0
    for card in photo_cards:
        # Find the <a> tag inside the div with class 'hz-photo-card__ratio-box'
        a_tag = card.find('a', class_='hz-photo-card__ratio-box')
        if a_tag:
            # Find the <img> tag inside the <a> tag
            img_tag = a_tag.find('img', class_='hz-photo-card__img')
            if img_tag:
                # Extract the image URL from 'src' or 'srcset' attribute
                src = img_tag.get('src')
                if src and count < max_images:
                    try:
                        # Download the image
                        img_data = requests.get(src).content
                        with open(f"{folder}/image_{count}.jpg", 'wb') as f:
                            f.write(img_data)
                        count += 1
                    except Exception as e:
                        print(f"Could not download {src}: {e}")

    print(f"Downloaded {count} images to {folder}")


scrape_images('https://www.houzz.com/photos/modern-kitchen-ideas-phbr1-bp~t_709~s_2105', 'data/raw/kitchen/modern')


Downloaded 19 images to data/raw/kitchen/modern


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import StaleElementReferenceException
import time
import requests
import os

def scrape_pinterest_images(url, folder, name_img, max_images=300):
    os.makedirs(folder, exist_ok=True)
    
    # Set up Selenium WebDriver
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run in headless mode (no browser window)
    
    # Specify the path to the ChromeDriver executable
    service = Service('C:/Program Files/chromedriver-win64/chromedriver.exe')  
    
    driver = webdriver.Chrome(service=service, options=options)
    
    driver.get(url)
    time.sleep(3)  # Wait for the page to load fully

    images = set()
    count = 0
    
    while count < max_images:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)  # Wait for more images to load

        try:
            # Find the specific <div> elements
            div_elements = driver.find_elements(By.CSS_SELECTOR, "div[data-test-id='deeplink-wrapper'] img")
        except Exception as e:
            print(f"Error while fetching images: {e}")
            continue

        for div in div_elements:
            try:
                # Get the src attribute from the img inside the div
                img_url = div.get_attribute('src')
                if img_url and img_url not in images:
                    images.add(img_url)
                    try:
                        img_data = requests.get(img_url).content
                        with open(f"{folder}/{name_img}_{count}.jpg", 'wb') as f:
                            f.write(img_data)
                        print(f"Downloaded image {count + 1}: {img_url}")
                        count += 1
                        if count >= max_images:
                            break
                    except Exception as e:
                        print(f"Could not download {img_url}: {e}")
            except StaleElementReferenceException:
                print("Encountered stale element. Skipping this image.")
                continue

    driver.quit()
    print(f"Downloaded {count} images to {folder}")

scrape_pinterest_images(
    'https://za.pinterest.com/search/pins/?q=industrial%20bedroom%20design&rs=typed',
    'data/raw/bedroom/industrial',
    'bedroom_industrial'
)

# scrape_pinterest_images(
#     'https://za.pinterest.com/search/pins/?q=boho%20toilet%20design&rs=typed',
#     'data/raw/bathroom/boho',
#     'bathroom_boho'
# )

# scrape_pinterest_images(
#     'https://za.pinterest.com/search/pins/?q=scandinavian%20toilet%20design&rs=typed',
#     'data/raw/bathroom/scandinavian',
#     'bathroom_scandinavian'
# )

# scrape_pinterest_images(
#     'https://za.pinterest.com/search/pins/?q=minimalist%20toilet%20design&rs=typed',
#     'data/raw/bathroom/minimalist',
#     'bathroom_minimalist'
# )

# scrape_pinterest_images(
#     'https://za.pinterest.com/search/pins/?q=modern%20toilet%20design&rs=typed',
#     'data/raw/bathroom/modern',
#     'bathroom_modern'
# )

Downloaded image 1: https://i.pinimg.com/236x/2c/b8/a9/2cb8a90369a2b44963fe3075d5f1d3d6.jpg
Downloaded image 2: https://i.pinimg.com/236x/2d/7b/16/2d7b16661d59def110874238dc4861fc.jpg
Downloaded image 3: https://i.pinimg.com/236x/84/ea/7e/84ea7e4c6d82dbbbdf62ce012a8c1b4f.jpg
Downloaded image 4: https://i.pinimg.com/236x/8c/f1/18/8cf11801befeb0b1478480b12fe24742.jpg
Downloaded image 5: https://i.pinimg.com/236x/80/34/76/80347678401225bb3da9f751528a1b2d.jpg
Downloaded image 6: https://i.pinimg.com/236x/b7/cf/ba/b7cfba821514f33842143cb52751ba48.jpg
Downloaded image 7: https://i.pinimg.com/236x/51/86/3c/51863c06a06b9c21395ab580a3feb2c4.jpg
Downloaded image 8: https://i.pinimg.com/236x/63/a4/c8/63a4c89b67f6cfb9b236854e805f4aa9.jpg
Downloaded image 9: https://i.pinimg.com/236x/85/58/b4/8558b411dadcffc543a4d8ab4297680a.jpg
Downloaded image 10: https://i.pinimg.com/236x/d1/ab/05/d1ab055c70e42a79a179b9b5b1ae3696.jpg
Downloaded image 11: https://i.pinimg.com/236x/93/6c/c9/936cc929c53102c561fbe7a