In [1]:
import requests
from parsel import Selector
import pandas as pd
import json

In [2]:
# URL of the page to scrape
url = "https://www.bol.com/nl/nl/l/audio-hifi/10714/"

In [3]:
# Send a GET request to the URL
response = requests.get(url)

In [7]:
# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content using Parsel
    selector = Selector(response.text)
    
    # Find the elements containing the products
    products = selector.xpath('//div[@class="product-item__content"]')
    
    # List to store the data
    data = []

    # Iterate over the products and extract the required information
    for product in products:
        tracking_event_tag = product.xpath('.//wsp-analytics-tracking-event/@data-config').get()
        
        # Check if tracking_event_tag is found
        if not tracking_event_tag:
            continue
        
        # Extract the 'data-config' attribute
        data_config = tracking_event_tag
        
        # Parse the JSON data
        config_object = json.loads(data_config)
        
        # Get the 'product_id'
        product_id = config_object['product_id']
        
        title_tag = product.xpath('.//a[@class="product-title px_list_page_product_click list_page_product_tracking_target"]/text()').get()
        title = title_tag.strip() if title_tag else 'N/A'
        
        brand_tag = product.xpath('.//ul[@class="product-creator"]/text()').get()
        brand = brand_tag.strip() if brand_tag else 'N/A'
        
        # Find the outer div tag
        outer_div = product.xpath('.//div[@class="u-mb--xs"]')
        
        # Check if outer_div is found
        if not outer_div:
            continue
        
        # Extract the rating and number of reviews
        aria_label = outer_div.xpath('./@aria-label').get()
        rating = aria_label.split('Gemiddeld')[1].split('van de 5 sterren')[0].strip() if 'Gemiddeld' in aria_label else 'N/A'
        
        number_of_reviews = aria_label.split('uit')[1].split('reviews')[0].strip() if 'uit' in aria_label else 'N/A'
        n_reviews = number_of_reviews
        
        url_tag = product.xpath('.//a[@class="product-title px_list_page_product_click list_page_product_tracking_target"]/@href').get()
        url = 'https://www.bol.com' + url_tag if url_tag else 'N/A'
        
        price_span = product.xpath('.//span[@class="promo-price"]')
        
        # Check if price_span is found
        if not price_span:
            continue
        
        # Extract the integer part of the price and strip whitespace
        integer_part = price_span.xpath('normalize-space(text())').get().replace('.-', '')

        # Extract the fractional part of the price, if it exists, and strip whitespace
        fraction_sup = price_span.xpath('.//sup[@class="promo-price__fraction"]/text()').get()
        fraction_part = fraction_sup.strip() if fraction_sup else '00'

        # Combine the parts to get the full price as a string
        price_str = f"{integer_part}.{fraction_part}"

        # Convert the price to a float
        try:
            price_float = float(price_str)
        except ValueError:
            # Handle cases where price_str might still be malformed
            if integer_part and not fraction_part.isdigit():
                price_str = f"{integer_part}.00"
                price_float = float(price_str)
        
        price = price_float
        
        currency = 'EUR'  # Assuming the currency is EUR 
        
        category_name = 'Audio & HiFi'  # As per the given category in the URL

        # Append the data to the list
        data.append([product_id, title, brand, rating, n_reviews, url, price, currency, category_name])

    # Create a DataFrame from the data
    df = pd.DataFrame(data, columns=['PID', 'Title', 'Brand', 'Rating', 'N_Reviews', 'URL', 'Price', 'Currency', 'Category_Name'])

    # Save the DataFrame to a CSV file
    df.to_csv('products_xpath.csv', index=False)
    
    print('Data has been successfully saved to product_xpath.csv')
else:
    print('Failed. Status code:', response.status_code)


Data has been successfully saved to product_xpath.csv


In [8]:
df

Unnamed: 0,PID,Title,Brand,Rating,N_Reviews,URL,Price,Currency,Category_Name
0,9300000174719290,Carplay Scherm - Touchscreen - Navigatiesystee...,,3.6,17,https://www.bol.com/nl/nl/p/carplay-scherm-tou...,84.95,EUR,Audio & HiFi
1,9300000161136530,Apple EarPods (USB-C),,4.1,24,https://www.bol.com/nl/nl/p/earpods/9300000161...,19.35,EUR,Audio & HiFi
2,9300000025109728,JBL Tune 510BT - Draadloze on-ear koptelefoon ...,,4.4,484,https://www.bol.com/nl/nl/p/jbl-tune-510bt-dra...,36.0,EUR,Audio & HiFi
3,9200000074953095,Pioneer GM-A6704 Autoversterker 4 kanaals - 10...,,5.0,1,https://www.bol.com/nl/nl/p/pioneer-gm-a6704-a...,139.0,EUR,Audio & HiFi
4,9200000066399178,Pioneer PD-10AE - CD-speler - Zwart,,4.8,12,https://www.bol.com/nl/nl/p/pioneer-pd-10ae-cd...,219.99,EUR,Audio & HiFi
5,9300000020209238,JBL Tune 660NC Zwart - Draadloze on-ear Noise ...,,4.4,128,https://www.bol.com/nl/nl/p/jbl-tune-660nc-zwa...,60.0,EUR,Audio & HiFi
6,9200000108340791,Apple AirPods 2 - met reguliere oplaadcase,,4.6,1999,https://www.bol.com/nl/nl/p/apple-airpods-2-in...,127.0,EUR,Audio & HiFi
7,9300000161136600,Apple AirPods Pro 2 - met MagSafe oplaadcase (...,,4.4,325,https://www.bol.com/nl/nl/p/mqd83am-a-airpods-...,247.58,EUR,Audio & HiFi
8,9200000108639206,JBL Flip 5 Zwart - Draagbare Bluetooth Speaker,,4.6,531,https://www.bol.com/nl/nl/p/jbl-flip-5-zwart-d...,99.0,EUR,Audio & HiFi
9,9200000065666274,Apple EarPods met lightning aansluiting,,3.3,757,https://www.bol.com/nl/nl/p/apple-earpods-met-...,20.99,EUR,Audio & HiFi
