In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json

In [2]:
# URL of the page to scrape
url = "https://www.bol.com/nl/nl/l/audio-hifi/10714/"

In [3]:
# Send a GET request to the URL
response = requests.get(url)

In [5]:
# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Find the elements containing the products
    products = soup.find_all('div', class_='product-item__content')
    
    # List to store the data
    data = []

    # Iterate over the products and extract the required information
    for product in products:
        tracking_event_tag = product.find('wsp-analytics-tracking-event')
        
        # Check if tracking_event_tag is found
        if not tracking_event_tag:
            continue
        
        # Extract the 'data-config' attribute
        data_config = tracking_event_tag['data-config']
        
        # Parse the JSON data
        config_object = json.loads(data_config)
        
        # Get the 'product_id'
        product_id = config_object['product_id']
        
        title_tag = product.find('a', class_='product-title px_list_page_product_click list_page_product_tracking_target')
        title = title_tag.get_text(strip=True) if title_tag else 'N/A'
        
        brand_tag = product.find('ul', class_='product-creator')
        brand = brand_tag.get_text(strip=True) if brand_tag else 'N/A'
        
        # Find the outer div tag
        outer_div = product.find('div', class_='u-mb--xs')
        
        # Check if outer_div is found
        if not outer_div:
            continue
        
        # Extract the rating and number of reviews
        aria_label = outer_div['aria-label']
        rating = aria_label.split('Gemiddeld')[1].split('van de 5 sterren')[0].strip()
        
        number_of_reviews = aria_label.split('uit')[1].split('reviews')[0].strip()
        n_reviews = number_of_reviews
        
        url = 'https://www.bol.com' + title_tag['href'] if title_tag else 'N/A'
        
        price_span = product.find('span', class_='promo-price')
        
        # Check if price_span is found
        if not price_span:
            continue
        
        # Extract the integer part of the price and strip whitespace
        integer_part = price_span.contents[0].strip().replace('.-', '')

        # Extract the fractional part of the price, if it exists, and strip whitespace
        fraction_sup = price_span.find('sup', class_='promo-price__fraction')
        fraction_part = fraction_sup.text.strip() if fraction_sup and fraction_sup.text.strip() else '00'

        # Combine the parts to get the full price as a string
        price_str = f"{integer_part}.{fraction_part}"

        # Convert the price to a float
        try:
            price_float = float(price_str)
        except ValueError:
            # Handle cases where price_str might still be malformed
            if integer_part and not fraction_part.isdigit():
                price_str = f"{integer_part}.00"
                price_float = float(price_str)
        
        price = price_float
        
        currency = 'EUR'  # Assuming the currency is EUR 
        
        category_name = 'Audio & HiFi'  # As per the given category in the URL

        # Append the data to the list
        data.append([product_id, title, brand, rating, n_reviews, url, price, currency, category_name])

    # Create a DataFrame from the data
    df = pd.DataFrame(data, columns=['PID', 'Title', 'Brand', 'Rating', 'N_Reviews', 'URL', 'Price', 'Currency', 'Category_Name'])

    # Save the DataFrame to a CSV file
    df.to_csv('products.csv', index=False)
    
    print('Data has been successfully saved to products.csv')
else:
    print('Failed. Status code:', response.status_code)

Data has been successfully saved to products.csv


In [6]:
df

Unnamed: 0,PID,Title,Brand,Rating,N_Reviews,URL,Price,Currency,Category_Name
0,9300000174719290,Carplay Scherm - Touchscreen - Navigatiesystee...,The Useful,3.6,17,https://www.bol.com/nl/nl/p/carplay-scherm-tou...,84.95,EUR,Audio & HiFi
1,9300000161469652,Samsung koptelefoon EO-EG920BW Hybrid earbuds wit,Samsung,4.0,1,https://www.bol.com/nl/nl/p/samsung-stereo-hea...,10.22,EUR,Audio & HiFi
2,9200000066399178,Pioneer PD-10AE - CD-speler - Zwart,Pioneer,4.8,12,https://www.bol.com/nl/nl/p/pioneer-pd-10ae-cd...,209.99,EUR,Audio & HiFi
3,9200000074953095,Pioneer GM-A6704 Autoversterker 4 kanaals - 10...,Pioneer,5.0,1,https://www.bol.com/nl/nl/p/pioneer-gm-a6704-a...,139.0,EUR,Audio & HiFi
4,9300000020209238,JBL Tune 660NC Zwart - Draadloze on-ear Noise ...,JBL,4.3,123,https://www.bol.com/nl/nl/p/jbl-tune-660nc-zwa...,60.0,EUR,Audio & HiFi
5,9200000108340791,Apple AirPods 2 - met reguliere oplaadcase,Apple,4.6,1998,https://www.bol.com/nl/nl/p/apple-airpods-2-in...,127.0,EUR,Audio & HiFi
6,9300000161136600,Apple AirPods Pro 2 - met MagSafe oplaadcase (...,Apple,4.4,325,https://www.bol.com/nl/nl/p/mqd83am-a-airpods-...,247.0,EUR,Audio & HiFi
7,9200000108639206,JBL Flip 5 Zwart - Draagbare Bluetooth Speaker,JBL,4.6,531,https://www.bol.com/nl/nl/p/jbl-flip-5-zwart-d...,79.95,EUR,Audio & HiFi
8,9300000155640894,JBL Tune 235NC TWS - Volledig Draadloze Oordop...,JBL,3.8,439,https://www.bol.com/nl/nl/p/jbl-tune-235nc-tws...,58.0,EUR,Audio & HiFi
9,9200000065666274,Apple EarPods met lightning aansluiting,Apple,3.3,757,https://www.bol.com/nl/nl/p/apple-earpods-met-...,19.75,EUR,Audio & HiFi
