In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import pandas as pd
import time

In [2]:
# Setup
options = Options()
# options.add_argument("--headless")  # Turn on after debugging
options.add_argument("--window-size=1920,1080")

driver = webdriver.Chrome(options=options)

In [8]:
def is_end_of_products(driver):
    try:
        msg_elem = driver.find_element(By.XPATH, "//*[contains(text(), \"Sorry, there are no products available right now\")]")
        return msg_elem.is_displayed()
    except NoSuchElementException:
        return False

In [5]:
def scrape_category_page(driver, url):
    driver.get(url)
    wait = WebDriverWait(driver, 15)
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "body")))

    if is_end_of_products(driver):
        print("🚫 End of category detected (message is visible).")
        return None

    products = driver.find_elements(By.CSS_SELECTOR, ".product-tile")
    page_data = []

    for i, p in enumerate(products):
        try:
            name = p.find_element(By.CSS_SELECTOR, "div.product-tile__name > p").text.strip()
            price = p.find_element(By.CSS_SELECTOR, "div.product-tile__price-container > span").text.strip()
            page_data.append({"name": name, "price": price})
        except Exception as e:
            print(f"⚠️ Skipping product {i}: {e}")

    return page_data



In [6]:
base_url = "https://www.aldi.us/products?page={}"
all_data = []

page = 1
while True:
    print(f"➡ Scraping page {page}")
    url = base_url.format(page)
    page_data = scrape_category_page(driver, url)

    if page_data is None:
        print("✅ Stopping scraper — reached last page.")
        break

    all_data.extend(page_data)
    page += 1


➡ Scraping page 1
➡ Scraping page 2
➡ Scraping page 3
⚠️ Skipping product 23: Message: stale element reference: stale element not found in the current frame
  (Session info: chrome=140.0.7339.210); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#staleelementreferenceexception
Stacktrace:
	GetHandleVerifier [0x0x7ff7fbbb1eb5+80197]
	GetHandleVerifier [0x0x7ff7fbbb1f10+80288]
	(No symbol) [0x0x7ff7fb9302fa]
	(No symbol) [0x0x7ff7fb937e5a]
	(No symbol) [0x0x7ff7fb93af6f]
	(No symbol) [0x0x7ff7fb93b02f]
	(No symbol) [0x0x7ff7fb987587]
	(No symbol) [0x0x7ff7fb987f9c]
	(No symbol) [0x0x7ff7fb97a59c]
	(No symbol) [0x0x7ff7fb9b03bf]
	(No symbol) [0x0x7ff7fb97a456]
	(No symbol) [0x0x7ff7fb9b0590]
	(No symbol) [0x0x7ff7fb9d87fb]
	(No symbol) [0x0x7ff7fb9b0153]
	(No symbol) [0x0x7ff7fb978b02]
	(No symbol) [0x0x7ff7fb9798d3]
	GetHandleVerifier [0x0x7ff7fbe6e83d+2949837]
	GetHandleVerifier [0x0x7ff7fbe68c6a+2926330]
	GetHandleVe

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=140.0.7339.210)
Stacktrace:
	GetHandleVerifier [0x0x7ff7fbbb1eb5+80197]
	GetHandleVerifier [0x0x7ff7fbbb1f10+80288]
	(No symbol) [0x0x7ff7fb9302fa]
	(No symbol) [0x0x7ff7fb9082a1]
	(No symbol) [0x0x7ff7fb9b799e]
	(No symbol) [0x0x7ff7fb9d8132]
	(No symbol) [0x0x7ff7fb9b0153]
	(No symbol) [0x0x7ff7fb978b02]
	(No symbol) [0x0x7ff7fb9798d3]
	GetHandleVerifier [0x0x7ff7fbe6e83d+2949837]
	GetHandleVerifier [0x0x7ff7fbe68c6a+2926330]
	GetHandleVerifier [0x0x7ff7fbe886c7+3055959]
	GetHandleVerifier [0x0x7ff7fbbccfee+191102]
	GetHandleVerifier [0x0x7ff7fbbd50af+224063]
	GetHandleVerifier [0x0x7ff7fbbbaf64+117236]
	GetHandleVerifier [0x0x7ff7fbbbb119+117673]
	GetHandleVerifier [0x0x7ff7fbba10a8+11064]
	BaseThreadInitThunk [0x0x7fff56f7e8d7+23]
	RtlUserThreadStart [0x0x7fff57608d9c+44]


In [7]:
df = pd.DataFrame(all_data)
df.to_csv("aldi_all_pages.csv", index=False)
print(f"✅ Scraped {len(df)} products across {page-1} pages.")

✅ Scraped 2054 products across 71 pages.
