In [12]:
import requests
import json
from pprint import pprint

In [13]:
# Jumbo GraphQL API Setup
url = 'https://www.jumbo.com/api/graphql'

headers = {
    'accept': '*/*',
    'accept-language': 'en-US,en;q=0.6',
    'apollographql-client-name': 'JUMBO_WEB-search',
    'apollographql-client-version': 'master-v17.1.0-web',
    'content-type': 'application/json',
    'origin': 'https://www.jumbo.com',
    'priority': 'u=1, i',
    'referer': 'https://www.jumbo.com/producten/?offSet=24',
    'sec-ch-ua': '"Not)A;Brand";v="8", "Chromium";v="138", "Brave";v="138"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'sec-gpc': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
    'x-source': 'JUMBO_WEB-search'
}

cookies = {
    'country': 'NL',
    'AKA_A2': 'A',
    'akaas_as': '2147483647~rv=69~id=07661a79d369db894b41d018f17845bb~rn=',
    'SSLB': '1',
    'SSSC_2rnq': '1003.G7533186252926324986.1|88709.2772980:88734.2773386:89112.2783663:90721.2817433',
    'i18n_redirected': 'nl-NL',
    'fep_jodpuid': 'J706874165-1753956607',
    'fep_jodpsid': '101173757-1753958407118',
    'sid': 'agE-XNxqyMQpXLThKwFF779gYZhNvNfzGeGTZjva',
    'pgid-Jumbo-Grocery-Site': 'DYql6yV1CQ1SRpUuB1PyiAht0000fO5Hx7O8',
    'user-session': '89a91f70-6df6-11f0-8038-b18c11edbef5'
}

In [14]:
# GraphQL Query Payload
def create_search_payload(offset=0):
    """Create GraphQL payload for searching products with given offset"""
    return {
        "operationName": "SearchProducts",
        "variables": {
            "input": {
                "searchType": "category",
                "searchTerms": "producten",
                "friendlyUrl": f"?offSet={offset}",
                "offSet": offset,
                "currentUrl": f"/producten/?offSet={offset}",
                "previousUrl": "",
                "bloomreachCookieId": ""
            },
            "shelfTextInput": {
                "searchType": "category",
                "friendlyUrl": f"?offSet={offset}"
            },
            "withFacetChildren": False
        },
        "query": """query SearchProducts($input: ProductSearchInput!, $shelfTextInput: ShelfTextInput!, $withFacetChildren: Boolean!) {
  searchProducts(input: $input) {
    redirectUrl
    removeAllAction {
      friendlyUrl
      __typename
    }
    pageHeader {
      headerText
      count
      __typename
    }
    start
    count
    sortOptions {
      text
      friendlyUrl
      selected
      __typename
    }
    categoryTiles {
      count
      catId
      name
      friendlyUrl
      imageLink
      displayOrder
      subtitle
      __typename
    }
    facets {
      key
      displayName
      multiSelect
      tooltip {
        linkTarget
        linkText
        text
        __typename
      }
      values {
        ...FacetDetails
        children @include(if: $withFacetChildren) {
          ...FacetDetails @include(if: $withFacetChildren)
          children {
            ...FacetDetails
            children {
              ...FacetDetails
              children {
                ...FacetDetails
                children {
                  ...FacetDetails
                  __typename
                }
                __typename
              }
              __typename
            }
            __typename
          }
          __typename
        }
        __typename
      }
      __typename
    }
    products {
      ...SearchProductDetails
      crossSells {
        sku
        __typename
      }
      retailSetProducts {
        ...SearchProductDetails
        __typename
      }
      __typename
    }
    pathways {
      title
      subTitle
      products {
        ...SearchProductDetails
        retailSetProducts {
          ...SearchProductDetails
          __typename
        }
        __typename
      }
      __typename
    }
    textMessage {
      header
      linkText
      longBody
      messageType
      shortBody
      targetUrl
      __typename
    }
    socialLists {
      author
      authorVerified
      followers
      id
      productImages
      thumbnail
      title
      __typename
    }
    selectedFacets {
      values {
        name
        count
        friendlyUrl
        __typename
      }
      __typename
    }
    breadcrumbs {
      text
      friendlyUrl
      __typename
    }
    seo {
      title
      description
      canonicalLink
      __typename
    }
    categoryId
    __typename
  }
  getCategoryShelfText(input: $shelfTextInput) {
    shelfText
    __typename
  }
}

fragment FacetDetails on Facet {
  id
  count
  name
  parent
  friendlyUrl
  selected
  thematicAisle
  __typename
}

fragment SearchProductDetails on Product {
  id: sku
  brand
  category: rootCategory
  subtitle: packSizeDisplay
  title
  image
  inAssortment
  availability {
    availability
    isAvailable
    label
    stockLimit
    reason
    availabilityNote
    __typename
  }
  sponsored
  auctionId
  link
  retailSet
  prices: price {
    price
    promoPrice
    pricePerUnit {
      price
      unit
      __typename
    }
    __typename
  }
  quantityDetails {
    maxAmount
    minAmount
    stepAmount
    defaultAmount
    __typename
  }
  primaryBadge: primaryProductBadges {
    alt
    image
    __typename
  }
  secondaryBadges: secondaryProductBadges {
    alt
    image
    __typename
  }
  customerAllergies {
    short
    __typename
  }
  promotions {
    id
    group
    isKiesAndMix
    image
    tags {
      text
      inverse
      __typename
    }
    start {
      dayShort
      date
      monthShort
      __typename
    }
    end {
      dayShort
      date
      monthShort
      __typename
    }
    attachments {
      type
      path
      __typename
    }
    primaryBadge: primaryBadges {
      alt
      image
      __typename
    }
    volumeDiscounts {
      discount
      volume
      __typename
    }
    durationTexts {
      shortTitle
      __typename
    }
    maxPromotionQuantity
    url
    __typename
  }
  surcharges {
    type
    value {
      amount
      currency
      __typename
    }
    __typename
  }
  characteristics {
    freshness {
      name
      value
      url
      __typename
    }
    logo {
      name
      value
      url
      __typename
    }
    tags {
      url
      name
      value
      __typename
    }
    __typename
  }
  __typename
}"""
    }

In [None]:
# Test the API request
print("=== Testing Jumbo GraphQL API ===")

# Start with offset 0 to get first page
payload = create_search_payload(offset=0)

response = requests.post(url, headers=headers, cookies=cookies, json=payload)

print(f"Status Code: {response.status_code}")

if response.status_code == 200:
    data = response.json()
    search_results = data.get('data', {}).get('searchProducts', {})
    
    print(f"Total count: {search_results.get('count', 'Unknown')}")
    print(f"Start: {search_results.get('start', 'Unknown')}")
    
    products = search_results.get('products', [])
    print(f"Products returned: {len(products)}")
    
    # Show basic API structure
    print("\n=== API Response Structure ===")
    if data.get('data'):
        print("Top-level keys in data:")
        for key in data['data'].keys():
            print(f"  - {key}")
        
        if search_results:
            print("\\nKeys in searchProducts:")
            for key in search_results.keys():
                if key == 'products':
                    print(f"  - {key}: array with {len(search_results[key])} items")
                else:
                    print(f"  - {key}: {type(search_results[key])}")
else:
    print(f"Error: {response.status_code}")
    print(f"Response: {response.text[:500]}...")

=== Testing Jumbo GraphQL API ===
Status Code: 200
Total count: 17432
Start: 0
Products returned: 24

=== API Response Structure ===
Top-level keys in data:
  - searchProducts
  - getCategoryShelfText
\nKeys in searchProducts:
  - redirectUrl: <class 'NoneType'>
  - removeAllAction: <class 'dict'>
  - pageHeader: <class 'dict'>
  - start: <class 'int'>
  - count: <class 'int'>
  - sortOptions: <class 'list'>
  - categoryTiles: <class 'list'>
  - facets: <class 'list'>
  - products: array with 24 items
  - pathways: <class 'list'>
  - textMessage: <class 'dict'>
  - socialLists: <class 'list'>
  - selectedFacets: <class 'list'>
  - breadcrumbs: <class 'list'>
  - seo: <class 'dict'>
  - categoryId: <class 'str'>
  - __typename: <class 'str'>


: 

In [6]:
# Analyze product structure
print("=== Product Structure Analysis ===")

# Look at the first product in detail
if data['data']['searchProducts']['products']:
    first_product = data['data']['searchProducts']['products'][0]
    print(f"First product keys: {list(first_product.keys())}")
    print()
    
    # Show the structure of key fields
    for key, value in first_product.items():
        if isinstance(value, dict):
            print(f"{key}: dict with keys: {list(value.keys())}")
        elif isinstance(value, list):
            print(f"{key}: list with {len(value)} items")
            if value and isinstance(value[0], dict):
                print(f"  - First item keys: {list(value[0].keys())}")
        else:
            print(f"{key}: {type(value).__name__} = {value}")
    
    print("\n=== Sample Product Data ===")
    # Show specific fields we're interested in
    product = first_product
    print(f"Title: {product.get('title', 'N/A')}")
    print(f"ID: {product.get('id', 'N/A')}")
    
    # Check price structure
    if 'price' in product:
        price_info = product['price']
        print(f"Price structure: {price_info}")
    
    # Check quantity structure  
    if 'quantity' in product:
        quantity_info = product['quantity']
        print(f"Quantity structure: {quantity_info}")
        
    # Check availability
    if 'availability' in product:
        availability_info = product['availability']
        print(f"Availability structure: {availability_info}")

=== Product Structure Analysis ===
First product keys: ['id', 'brand', 'category', 'subtitle', 'title', 'image', 'inAssortment', 'availability', 'sponsored', 'auctionId', 'link', 'retailSet', 'prices', 'quantityDetails', 'primaryBadge', 'secondaryBadges', 'customerAllergies', 'promotions', 'surcharges', 'characteristics', '__typename', 'crossSells', 'retailSetProducts']

id: str = 682052TRA
brand: str = Jumbo
category: str = Vlees, vis en vega
subtitle: str = 220 g
title: str = Jumbo BBQ Hamburger 2 Stuks
image: str = https://www.jumbo.com/dam-images/fit-in/360x360/Products/27052025_1748349169211_1748349174263_8718452911745_1.png
inAssortment: bool = True
availability: dict with keys: ['availability', 'isAvailable', 'label', 'stockLimit', 'reason', 'availabilityNote', '__typename']
sponsored: NoneType = None
auctionId: NoneType = None
link: str = /producten/jumbo-bbq-hamburger-2-stuks-682052TRA
retailSet: bool = False
prices: dict with keys: ['price', 'promoPrice', 'pricePerUnit', '__t

In [15]:
# Detailed price and promotion analysis
print("=== Price Details ===")
if data['data']['searchProducts']['products']:
    first_product = data['data']['searchProducts']['products'][0]
    
    # Price information
    prices = first_product['prices']
    print(f"Prices structure: {prices}")
    
    # Quantity details
    quantity_details = first_product['quantityDetails']
    print(f"Quantity details: {quantity_details}")
    
    # Promotions
    promotions = first_product['promotions']
    print(f"Number of promotions: {len(promotions)}")
    if promotions:
        print(f"First promotion: {promotions[0]}")
    
    print("\n=== Looking at more products for variety ===")
    # Let's look at a few more products to understand the data better
    for i, product in enumerate(data['data']['searchProducts']['products'][:5]):
        print(f"\nProduct {i+1}: {product['title']}")
        print(f"  ID: {product['id']}")
        print(f"  Brand: {product['brand']}")
        print(f"  Subtitle: {product['subtitle']}")
        print(f"  Price: {product['prices']['price']}")
        print(f"  Promo Price: {product['prices']['promoPrice']}")
        print(f"  Price per Unit: {product['prices']['pricePerUnit']}")
        print(f"  Available: {product['availability']['isAvailable']}")
        print(f"  In Assortment: {product['inAssortment']}")
        print(f"  Promotions: {len(product['promotions'])}")
        if product['promotions']:
            promo = product['promotions'][0]
            print(f"    - Promo ID: {promo['id']}")
            print(f"    - Promo Group: {promo['group']}")
            print(f"    - Start: {promo['start']}")
            print(f"    - End: {promo['end']}")

=== Price Details ===
Prices structure: {'price': 349, 'promoPrice': None, 'pricePerUnit': {'price': 1586, 'unit': 'kg', '__typename': 'PricePerUnit'}, '__typename': 'Price'}
Quantity details: {'maxAmount': 99.0, 'minAmount': 1.0, 'stepAmount': 1.0, 'defaultAmount': 1.0, '__typename': 'Quantity'}
Number of promotions: 1
First promotion: {'id': '3007338', 'group': 'Nvt', 'isKiesAndMix': True, 'image': 'https://www.jumbo.com/INTERSHOP/static/WFS/Jumbo-Grocery-Site/-/Jumbo-Grocery/nl_NL/Images_Aanbiedingen/2025/Kies%20en%20Mix/KM_BBQ2.png', 'tags': [{'text': '4 voor 10,00', 'inverse': False, '__typename': 'PromotionTag'}], 'start': {'dayShort': 'wo', 'date': 9, 'monthShort': 'apr', '__typename': 'PromotionDateTime'}, 'end': {'dayShort': 'di', 'date': 30, 'monthShort': 'sep', '__typename': 'PromotionDateTime'}, 'attachments': [{'type': 'KiesMixBadge', 'path': 'https://www.jumbo.com/INTERSHOP/web/WFS/Jumbo-Grocery-Site/nl_NL/-/EUR/ViewPromotionAttachment-OpenFile?LocaleId=nl_NL&DirectoryPat

In [11]:
# Complete Jumbo Scraper Implementation
import sys
import os
sys.path.append(os.path.abspath('..'))

from base_scraper import BaseScraper
from database import DatabaseManager
from datetime import datetime
import time
import logging

class JumboScraper(BaseScraper):
    def __init__(self, db_manager=None):
        # Create a dummy db_manager if none provided (for testing)
        if db_manager is None:
            db_manager = None  # We'll handle this in the methods
        
        super().__init__(db_manager, "jumbo")
        self.base_url = "https://www.jumbo.com/api"
        self.graphql_url = f"{self.base_url}/graphql"
        
        # Headers for API requests
        self.headers = {
            'accept': '*/*',
            'accept-language': 'en-US,en;q=0.9,nl;q=0.8',
            'apollographql-client-name': 'jumbo-grocery-pos-web',
            'apollographql-client-version': '2025.29.1',
            'content-type': 'application/json',
            'origin': 'https://www.jumbo.com',
            'referer': 'https://www.jumbo.com/',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'
        }
        
        # Cookies for session
        self.cookies = {
            'OptanonAlertBoxClosed': '2024-12-14T11:28:54.491Z',
            'OptanonConsent': 'isGpcEnabled=0&datestamp=Sat+Dec+14+2024+12%3A28%3A54+GMT%2B0100+(Central+European+Standard+Time)&version=202410.1.0&browserGpcFlag=0&isIABGlobal=false&hosts=&consentId=a1a41098-6a54-475e-a50f-12d8d70b9ce4&interactionCount=1&isAnonUser=1&landingPath=NotLandingPage&groups=C0001%3A1%2CC0002%3A1%2CC0003%3A1%2CC0004%3A1&AwaitingReconsent=false&geolocation=NL%3BNH',
            'AWSALB': 'QyOAR8JoQDT0YA4mTGHGjPn7W2UfRyYCGaQ6GcmcA6Bq6iGwV3kCcE1vWTB9o8hLSGlYMN4J5C8eL7lPgR4/bPZh+5kseDEW9w8MgKcP4LlKlMpLQM4kTmMT7ZrETUn/',
            'AWSALBCORS': 'QyOAR8JoQDT0YA4mTGHGjPn7W2UfRyYCGaQ6GcmcA6Bq6iGwV3kCcE1vWTB9o8hLSGlYMN4J5C8eL7lPgR4/bPZh+5kseDEW9w8MgKcP4LlKlMpLQM4kTmMT7ZrETUn/',
            'JSESSIONID': 'E8A2B4F5C6D7E8F9A0B1C2D3E4F5A6B7',
            'jumbostorecookie': '2018'
        }
        
        self.page_size = 24  # Default page size from API
    
    def create_search_payload(self, offset=0, limit=None):
        """Create GraphQL payload for product search using the working schema"""
        if limit is None:
            limit = self.page_size
            
        return {
            "operationName": "searchProducts",
            "variables": {
                "input": {
                    "searchTerms": "",
                    "limit": limit,
                    "offset": offset,
                    "facets": []
                }
            },
            "query": """query searchProducts($input: ProductSearchInput!) {
                searchProducts(input: $input) {
                    redirectUrl
                    removeAllAction {
                        label
                        url
                        trackingData
                    }
                    pageHeader {
                        title
                        subTitle
                    }
                    start
                    count
                    sortOptions {
                        value
                        label
                        trackingData
                    }
                    categoryTiles {
                        id
                        name
                        image
                        categoryTrackingData
                        url
                    }
                    facets {
                        id
                        name
                        trackingData
                        multiSelect
                        sortOrder
                        options {
                            id
                            name
                            count
                            trackingData
                        }
                    }
                    products {
                        id
                        brand
                        category
                        subtitle
                        title
                        image
                        inAssortment
                        availability {
                            availability
                            isAvailable
                            label
                            stockLimit
                            reason
                            availabilityNote
                            __typename
                        }
                        sponsored
                        auctionId
                        link
                        retailSet
                        prices {
                            price
                            promoPrice
                            pricePerUnit {
                                price
                                unit
                                __typename
                            }
                            __typename
                        }
                        quantityDetails {
                            maxAmount
                            minAmount
                            stepAmount
                            defaultAmount
                            __typename
                        }
                        primaryBadge {
                            alt
                            image
                            __typename
                        }
                        secondaryBadges {
                            alt
                            image
                            __typename
                        }
                        customerAllergies {
                            short
                            __typename
                        }
                        promotions {
                            id
                            group
                            isKiesAndMix
                            image
                            tags {
                                text
                                inverse
                                __typename
                            }
                            start {
                                dayShort
                                date
                                monthShort
                                __typename
                            }
                            end {
                                dayShort
                                date
                                monthShort
                                __typename
                            }
                            attachments {
                                type
                                path
                                __typename
                            }
                            primaryBadge {
                                alt
                                image
                                __typename
                            }
                            volumeDiscounts {
                                minQuantity
                                discount
                                text
                                __typename
                            }
                            durationTexts {
                                shortTitle
                                __typename
                            }
                            maxPromotionQuantity
                            url
                            __typename
                        }
                        surcharges {
                            text
                            value
                            __typename
                        }
                        characteristics {
                            freshness {
                                freshness
                                __typename
                            }
                            logo {
                                alt
                                image
                                __typename
                            }
                            tags {
                                tag
                                __typename
                            }
                            __typename
                        }
                        __typename
                        crossSells {
                            sku
                            __typename
                        }
                        retailSetProducts {
                            id
                            title
                            subtitle
                            image
                            link
                            quantity
                            crossSells {
                                sku
                                __typename
                            }
                            prices {
                                price
                                promoPrice
                                pricePerUnit {
                                    price
                                    unit
                                    __typename
                                }
                                __typename
                            }
                            __typename
                        }
                    }
                    pathways {
                        name
                        url
                    }
                    textMessage {
                        title
                        description
                    }
                    socialLists {
                        id
                        name
                        url
                        image
                        trackingData
                    }
                    selectedFacets {
                        facetId
                        facetOptionId
                        name
                        label
                        url
                        trackingData
                    }
                    breadcrumbs {
                        name
                        url
                        trackingData
                    }
                    seo {
                        title
                        description
                        noIndex
                        canonical
                    }
                    categoryId
                    __typename
                }
                getCategoryShelfText(input: $input) {
                    title
                    text
                    trackingData
                    __typename
                }
            }"""
        }
    
    def fetch_page(self, offset=0, limit=None):
        """Fetch a single page of products"""
        try:
            payload = self.create_search_payload(offset=offset, limit=limit)
            
            response = self.session.post(
                self.graphql_url,
                headers=self.headers,
                cookies=self.cookies,
                json=payload,
                timeout=30
            )
            
            if response.status_code == 200:
                data = response.json()
                if 'data' in data and 'searchProducts' in data['data']:
                    return data['data']['searchProducts']
                else:
                    print(f"Unexpected response structure: {data}")
                    return None
            else:
                print(f"HTTP {response.status_code}: {response.text}")
                return None
                
        except Exception as e:
            print(f"Error fetching page at offset {offset}: {e}")
            return None
    
    def parse_product(self, product_data):
        """Parse individual product data"""
        try:
            # Basic info
            product_id = product_data.get('id', '')
            name = product_data.get('title', '').strip()
            brand = product_data.get('brand', '').strip()
            category = product_data.get('category', '').strip()
            subtitle = product_data.get('subtitle', '').strip()
            
            # Create full name with subtitle if available
            if subtitle and subtitle not in name:
                full_name = f"{name} {subtitle}".strip()
            else:
                full_name = name
            
            # Price information
            prices = product_data.get('prices', {})
            current_price = prices.get('price')
            promo_price = prices.get('promoPrice')
            
            # Convert prices from cents to euros
            price = current_price / 100 if current_price else None
            original_price = price  # Default to current price
            
            # If there's a promo price, the promo price becomes current price
            # and the regular price becomes original price
            if promo_price is not None:
                original_price = price
                price = promo_price / 100
            
            # Price per unit
            price_per_unit_info = prices.get('pricePerUnit', {})
            price_per_unit = None
            unit = None
            if price_per_unit_info:
                unit_price = price_per_unit_info.get('price')
                unit = price_per_unit_info.get('unit', '')
                if unit_price:
                    price_per_unit = unit_price / 100 if unit != 'pieces' else unit_price
            
            # Availability
            availability_info = product_data.get('availability', {})
            in_stock = availability_info.get('isAvailable', False)
            in_assortment = product_data.get('inAssortment', False)
            availability = availability_info.get('availability', '')
            
            # Image and link
            image_url = product_data.get('image', '')
            product_link = product_data.get('link', '')
            if product_link and not product_link.startswith('http'):
                product_link = f"https://www.jumbo.com{product_link}"
            
            # Promotions
            promotions = product_data.get('promotions', [])
            on_sale = len(promotions) > 0 and promo_price is not None
            
            promotion_text = None
            if promotions:
                promo = promotions[0]  # Take first promotion
                tags = promo.get('tags', [])
                if tags:
                    promotion_text = tags[0].get('text', '')
                
                # Add duration if available
                duration = promo.get('durationTexts', {}).get('shortTitle', '')
                if duration:
                    promotion_text = f"{promotion_text} ({duration})" if promotion_text else duration
            
            # Badges and characteristics
            badges = []
            primary_badge = product_data.get('primaryBadge', [])
            secondary_badges = product_data.get('secondaryBadges', [])
            
            for badge in primary_badge + secondary_badges:
                alt_text = badge.get('alt', '')
                if alt_text:
                    badges.append(alt_text)
            
            characteristics = product_data.get('characteristics', {})
            tags = characteristics.get('tags', [])
            for tag in tags:
                if isinstance(tag, dict) and 'tag' in tag:
                    badges.append(tag['tag'])
            
            return {
                'product_id': product_id,
                'name': full_name,
                'brand': brand,
                'category': category,
                'price': price,
                'original_price': original_price,
                'price_per_unit': price_per_unit,
                'unit': unit,
                'on_sale': on_sale,
                'promotion_text': promotion_text,
                'in_stock': in_stock,
                'availability_text': availability,
                'image_url': image_url,
                'product_url': product_link,
                'badges': ', '.join(badges) if badges else None,
                'scraped_at': datetime.now()
            }
            
        except Exception as e:
            print(f"Error parsing product {product_data.get('id', 'unknown')}: {e}")
            return None
    
    def scrape_products_raw(self, max_products=None):
        """Scrape products and return raw data (for testing)"""
        all_products = []
        offset = 0
        
        print("Starting Jumbo product scraping...")
        
        while True:
            print(f"Fetching products from offset {offset}")
            
            # Fetch page
            page_data = self.fetch_page(offset=offset)
            if not page_data:
                print(f"Failed to fetch data at offset {offset}")
                break
            
            products = page_data.get('products', [])
            total_count = page_data.get('count', 0)
            
            print(f"Received {len(products)} products (total available: {total_count})")
            
            if not products:
                print("No more products found")
                break
            
            # Parse products
            for product_data in products:
                parsed_product = self.parse_product(product_data)
                if parsed_product:
                    all_products.append(parsed_product)
            
            print(f"Parsed {len(products)} products. Total so far: {len(all_products)}")
            
            # Check if we should continue
            offset += len(products)
            
            # Stop if we've reached max_products
            if max_products and len(all_products) >= max_products:
                all_products = all_products[:max_products]
                print(f"Reached max_products limit of {max_products}")
                break
            
            # Stop if we've reached the end
            if offset >= total_count:
                print(f"Reached end of products (offset {offset} >= total {total_count})")
                break
            
            # Rate limiting
            time.sleep(0.1)
        
        print(f"Scraping completed. Total products: {len(all_products)}")
        return all_products
    
    def scrape_products(self):
        """Main method required by BaseScraper interface"""
        # This would integrate with the database
        # For now, we'll implement the raw scraping method
        raw_products = self.scrape_products_raw()
        # TODO: Convert to Product objects and save to database
        return raw_products

# Test the scraper with a small sample
print("\n=== Testing Jumbo Scraper ===")
scraper = JumboScraper()

# Test with just a few products
test_products = scraper.scrape_products_raw(max_products=5)

print(f"\nScraped {len(test_products)} test products:")
for i, product in enumerate(test_products, 1):
    print(f"\n{i}. {product['name']}")
    print(f"   Brand: {product['brand']}")
    print(f"   Price: €{product['price']:.2f}")
    if product['original_price'] != product['price']:
        print(f"   Original Price: €{product['original_price']:.2f}")
    if product['price_per_unit']:
        print(f"   Price per {product['unit']}: €{product['price_per_unit']:.2f}")
    print(f"   Category: {product['category']}")
    print(f"   In Stock: {product['in_stock']}")
    if product['on_sale']:
        print(f"   On Sale: {product['promotion_text']}")
    if product['badges']:
        print(f"   Badges: {product['badges']}")

print("\n=== Jumbo Scraper Test Complete ===")


=== Testing Jumbo Scraper ===
Starting Jumbo product scraping...
Fetching products from offset 0
HTTP 400: {"errors":[{"message":"Field \"removeAllAction\" of type \"ProductSearchResult\" must have a selection of subfields. Did you mean \"removeAllAction { ... }\"?","locations":[{"line":4,"column":21}],"extensions":{"code":"GRAPHQL_VALIDATION_FAILED"}},{"message":"Cannot query field \"label\" on type \"SearchResultRemoveAllAction\".","locations":[{"line":5,"column":25}],"extensions":{"code":"GRAPHQL_VALIDATION_FAILED"}},{"message":"Cannot query field \"url\" on type \"SearchResultRemoveAllAction\".","locations":[{"line":6,"column":25}],"extensions":{"code":"GRAPHQL_VALIDATION_FAILED"}},{"message":"Cannot query field \"trackingData\" on type \"SearchResultRemoveAllAction\".","locations":[{"line":7,"column":25}],"extensions":{"code":"GRAPHQL_VALIDATION_FAILED"}},{"message":"Field \"pageHeader\" of type \"ProductSearchResult\" must have a selection of subfields. Did you mean \"pageHeader