In [11]:
import requests
from bs4 import BeautifulSoup
import os
from dotenv import load_dotenv
import pandas as pd
import json
from typing import List, Dict

load_dotenv()



def fetch_html_with_productlist(url: str, api_key: str):
    api_response = requests.post(
        "https://api.zyte.com/v1/extract",
        auth=(api_key, ""),
        json={
            "url": url,
            "browserHtml": True,
            "productList": True,
            "productListOptions": {"extractFrom": "browserHtml"},
            "actions":[{"action": "scrollBottom",},]
        },
    )

    # Save browser-rendered HTML for inspection
    browser_html: str = api_response.json()["browserHtml"]
    product_list = api_response.json()["productList"]
    
    return browser_html, product_list


def scrape_multiple_pages(site: Dict, api_key: str, start_page=1, end_page=3) -> List[Dict]:
    """
    Scrape multiple pages from a site config using Zyte API.

    Args:
        site (dict): Dictionary with keys 'url', 'pagination_pattern', etc.
        api_key (str): Zyte API key.
        start_page (int): First page number to scrape.
        end_page (int): Last page number to scrape.

    Returns:
        list: Combined list of scraped products.
    """
    all_products = []
    print(f"\n📦 Scraping {site['name']} from page {start_page} to {end_page}...\n")

    

    base_url = site.get("url")
    pagination_pattern = site.get("pagination_pattern")

    for page_num in range(start_page, end_page + 1):
        # Build URL based on page number
        if page_num == 1:
            url = base_url
        elif pagination_pattern:
            url = pagination_pattern.format(page=page_num)
        else:
            url = base_url

        print(f"  → Scraping page {page_num}: {url}")

        try:
            html, products = fetch_html_with_productlist(url, api_key)
            if isinstance(products, dict):
                products = products.get("products", [])
            for p in products:
                p["source_site"] = site["name"]
                p["scraped_from_page"] = page_num
            print(f"✅ Found {len(products)} products on page {page_num}")
            all_products.extend(products)
        except Exception as e:
            print(f"❌ Error on page {page_num}: {e}")

    return all_products


    






In [13]:
test_sites = [
    {
        "name": "Patagonia (official site)",
        "url": "https://eu.patagonia.com/at/de/shop/web-specials",
        "pagination_pattern": "https://eu.patagonia.com/at/de/shop/web-specials?p={page}",
        "brand": "Patagonia",
        "min_discount": 30,
        "currency": "EUR"
    },
    {
        "name": "Patagonia (Bergfreunde)",
        "url": "https://www.bergfreunde.eu/brands/patagonia/?searchparam=Patagonia",
        "pagination_pattern": "https://www.bergfreunde.eu/brands/patagonia/{page}/?searchparam=Patagonia",
        "brand": "Patagonia",
        "min_discount": 30,
        "currency": "EUR"
    },
    {
        "name": "Patagonia (Hardloop)",
        "url": "https://www.hardloop.at/marken/1491-patagonia",
        "pagination_pattern": "https://www.hardloop.at/marken/1491-patagonia?p={page}",
        "brand": "Patagonia",
        "min_discount": 30,
        "currency": "EUR"
    },
    {
        "name": "Patagonia (Snowleader)",
        "url": "https://www.snowleader.com/patagonia.html",
        "pagination_pattern": "https://www.snowleader.com/patagonia.html?p={page}",
        "brand": "Patagonia",
        "min_discount": 30,
        "currency": "EUR"
    }
]

In [None]:
# After your scraping call
api_key="b1a6bdf32dc347b98e9f429b8aa43910"
all_results = {}
for site in test_sites:
    all_results[site["name"]] = scrape_multiple_pages(site, api_key, start_page=1, end_page=10)





📦 Scraping Patagonia (official site) from page 1 to 10...

  → Scraping page 1: https://eu.patagonia.com/at/de/shop/web-specials
✅ Found 33 products on page 1
  → Scraping page 2: https://eu.patagonia.com/at/de/shop/web-specials?p=2
✅ Found 64 products on page 2
  → Scraping page 3: https://eu.patagonia.com/at/de/shop/web-specials?p=3
✅ Found 68 products on page 3
  → Scraping page 4: https://eu.patagonia.com/at/de/shop/web-specials?p=4
✅ Found 61 products on page 4
  → Scraping page 5: https://eu.patagonia.com/at/de/shop/web-specials?p=5
✅ Found 61 products on page 5
  → Scraping page 6: https://eu.patagonia.com/at/de/shop/web-specials?p=6
✅ Found 61 products on page 6
  → Scraping page 7: https://eu.patagonia.com/at/de/shop/web-specials?p=7
✅ Found 61 products on page 7
  → Scraping page 8: https://eu.patagonia.com/at/de/shop/web-specials?p=8
✅ Found 61 products on page 8
  → Scraping page 9: https://eu.patagonia.com/at/de/shop/web-specials?p=9
✅ Found 61 products on page 9
  → Scra