In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import pandas as pd
from datetime import datetime
import time

# Set up headless browser
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
driver = webdriver.Chrome(options=options)

search_base = "https://justingredients.co.uk/search?q=herbs&page={}"

total_pages = 9 

product_links = []  

for page_num in range(1, total_pages + 1):
    search_url = search_base.format(page_num)
    driver.get(search_url)
    time.sleep(3) 

    product_blocks = driver.find_elements(By.CLASS_NAME, "collection-item")
    print(f"Page {page_num}: Found {len(product_blocks)} product blocks")

    for block in product_blocks:
        try:
            link = block.get_attribute("href")
            name = block.text.strip()
            if link and name:
                product_links.append((name, link))
            else:
                print(f"‚ö†Ô∏è Skipped block with missing link or name on page {page_num}")
        except Exception as e:
            print(f"Error parsing block on page {page_num}: {e}")

print(f"Finished scraping {len(product_links)} unique product links")

results = []

for name, link in product_links:
    driver.get(link)
    time.sleep(2)
    try:
        
        rows = driver.find_elements(By.CSS_SELECTOR, 'tr')

        for row in rows:
            try:
                size_cell = row.find_element(By.CSS_SELECTOR, 'td[data-label="Size"]')
                price_divs = row.find_elements(By.CSS_SELECTOR, 'td[data-label="Price"] div.price')

                combined_price_text = " ".join([div.text.strip() for div in price_divs]).strip()

                raw_prices = combined_price_text.split()

                unique_prices = list(dict.fromkeys(raw_prices))

                numeric_prices = []
                for p in unique_prices:
                    try:
                        numeric_prices.append(float(p.replace("¬£", "").strip()))
                    except:
                        continue

                regular_price = sale_price = None

                if len(numeric_prices) == 1:
                    regular_price = sale_price = f"¬£{numeric_prices[0]:.2f}"
                elif len(numeric_prices) == 2:
                    high, low = max(numeric_prices), min(numeric_prices)
                    regular_price = f"¬£{high:.2f}"
                    sale_price = f"¬£{low:.2f}"

                price = combined_price_text
                size = size_cell.text.strip()

                results.append({
                    "name": name,
                    "url": link,
                    "variant": size,
                    "regular_price": regular_price,
                    "sale_price": sale_price,
                    "scraped_at": datetime.now().isoformat()
                })
            except Exception as e:
                continue
    except Exception as e:
        print(f"‚ö†Ô∏è Could not parse {link}: {e}")
        continue


driver.quit()

# Save to CSV
df = pd.DataFrame(results)
df.to_csv("herb_prices.csv", index=False)
print(f"Scraped {len(df)} product variants with visible prices")


üîç Page 1: Found 24 product blocks
üîç Page 2: Found 24 product blocks
üîç Page 3: Found 24 product blocks
üîç Page 4: Found 24 product blocks
üîç Page 5: Found 24 product blocks
üîç Page 6: Found 24 product blocks
üîç Page 7: Found 24 product blocks
üîç Page 8: Found 24 product blocks
üîç Page 9: Found 21 product blocks
‚úÖ Finished scraping 213 unique product links
‚úÖ Scraped 643 product variants with visible prices
