In [84]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()

# Navigate to the Aritzia Dresses page
driver.get("https://www.aritzia.com/en/clothing/dresses")

# Wait for the page to load (adjust time if needed)
wait = WebDriverWait(driver, 15)
wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "lazy")))

# Define the number of times to scroll
scroll_count = 6

# Simulate continuous scrolling using JavaScript
for _ in range(scroll_count):
    driver.execute_script("window.scrollBy(0, 1000);")
    time.sleep(2)  # Wait for new content to load




# Get the page source after scrolling
page_source = driver.page_source

# Close the WebDriver
driver.quit()

# Parse the page source with BeautifulSoup
soup = BeautifulSoup(page_source, "html.parser")

# Find all product containers (modify if the structure changes)
product_containers = soup.find_all("li", class_="ar-product-grid__tile")



# List to store extracted product data
products = []


# Loop through each product container
for index, product in enumerate(product_containers):
    print(f"\nProcessing product {index + 1}...")  # Debugging info

    try:
        # **Extract Title** (From the <a> tag with the `title` attribute)
        title_tag = product.find("a", title=True)
        title = title_tag["title"].strip() if title_tag else "No Title"
        print(f"Title: {title}")  # Debugging info

        # **Extract Image URL** (Handle lazy loading with multiple attributes)
        image_tag = product.find("img")
        image_url = (
            image_tag.get("data-original") or  # Best for lazy-loaded images
            image_tag.get("src") or  # Fallback if image is fully loaded
            image_tag.get("data-mouseout-img") or  # Alternative for product images
            (image_tag.get("data-srcset").split(",")[0].split(" ")[0] if image_tag and image_tag.get("data-srcset") else None)
            if image_tag else "No Image"
        )

        print(f"Image URL: {image_url}")  # Debugging info

        # **Extract Product Link**
        product_link = title_tag["href"] if title_tag and title_tag.has_attr("href") else "No Link"
        print(f"Product Link: {product_link}")  # Debugging info

        # **Extract Price** (Ensure correct class)
        price_span = product.find("span", class_="js-product__sales-price")
        price = price_span.get_text(strip=True) if price_span else "Price not found"
        print(f"Price: {price}")  # Debugging info

        # Append extracted data
        products.append({"Title": title, "Product Link": product_link, "Image URL": image_url, "Price": price})

    except Exception as e:
        print(f"Error extracting product {index + 1}: {e}")



Processing product 1...
Title: TECHNIQUE POPLIN DRESS
Image URL: https://assets.aritzia.com/image/upload/medium/s25_a08_116063_1274_on_a.jpg
Price: $168

Processing product 2...
Title: SHIMMER SATIN DRESS
Image URL: https://assets.aritzia.com/image/upload/medium/s25_a08_99544_10252_on_a.jpg
Price: $28.99-$98(Up to −70%)

Processing product 3...
Title: REGAL DRESS
Image URL: https://assets.aritzia.com/image/upload/medium/s25_a08_124641_30751_on_a.jpg
Price: $148

Processing product 4...
Title: CELEBRATE DRESS
Image URL: https://assets.aritzia.com/image/upload/medium/s25_a08_119390_19773_on_a.jpg
Price: $168

Processing product 5...
Title: AUDIENCE SATIN MINI DRESS
Image URL: https://assets.aritzia.com/image/upload/medium/s25_a08_121524_19773_on_a.jpg
Price: $138

Processing product 6...
Title: SHIMMER SATIN TUBE DRESS
Image URL: https://assets.aritzia.com/image/upload/medium/f24_a08_124181_2175_on_a.jpg
Price: $37.99-$128(Up to −70%)

Processing product 7...
Title: BOND DRESS
Image URL

In [85]:
# Convert extracted data into a Pandas DataFrame
df = pd.DataFrame(products)

if df.empty:
    print("No products found. The page structure might have changed. Please verify the class names.")

    
# # Display the DataFrame
# # tools.display_dataframe_to_user(name="Extracted Products", dataframe=df)

# # Save data to CSV (optional)
# df.to_csv("aritzia_dresses.csv", index=False)
# print("Data saved to aritzia_dresses.csv")


In [86]:
df.head()


Unnamed: 0,Title,Image URL,Price
0,TECHNIQUE POPLIN DRESS,https://assets.aritzia.com/image/upload/medium...,$168
1,SHIMMER SATIN DRESS,https://assets.aritzia.com/image/upload/medium...,$28.99-$98(Up to −70%)
2,REGAL DRESS,https://assets.aritzia.com/image/upload/medium...,$148
3,CELEBRATE DRESS,https://assets.aritzia.com/image/upload/medium...,$168
4,AUDIENCE SATIN MINI DRESS,https://assets.aritzia.com/image/upload/medium...,$138
