# Competitor: Bol

## Imports + Paths

In [None]:
from pathlib import Path
import sqlite3

REPO_ROOT = Path.cwd()  # already in Online-Data-Mining
DB_PATH = REPO_ROOT / "db" / "odm.sqlite"

print("CWD:", Path.cwd())
print("DB_PATH:", DB_PATH)

conn = sqlite3.connect(str(DB_PATH))

CWD: /Users/iro.chrisospathi/Documents/Online Dara Mining/Online-Data-Mining
DB_PATH: /Users/iro.chrisospathi/Documents/Online Dara Mining/Online-Data-Mining/db/odm.sqlite


## DB Connection

In [4]:
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA foreign_keys = ON;")

conn.execute("""
INSERT OR REPLACE INTO competitor (competitor_id, name, country, base_url)
VALUES (2, 'bol.com', 'NL', 'https://www.bol.com')
""")
conn.commit()

In [3]:
import json
from pathlib import Path

SUPPORT_JSONL = Path("Online-Data-Mining/data/raw/bol/bol_support.jsonl")
BOL_ID = 2

if SUPPORT_JSONL.exists():
    with SUPPORT_JSONL.open("r", encoding="utf-8") as f:
        for line in f:
            if not line.strip():
                continue
            r = json.loads(line)
            t = r.get("type")

            if t == "CUSTOMER_SERVICE":
                conn.execute("""
                    INSERT INTO customer_service (
                        competitor_id, listing_id, scraped_at,
                        shipping_included, free_shipping_threshold_amt,
                        pickup_point_available, delivery_shipping_available,
                        delivery_courier_available, cooling_off_days,
                        free_returns, warranty_provider, warranty_duration_months,
                        customer_service_url
                    )
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                """, (
                    BOL_ID,
                    None,  # listing_id optional; set if support is per-listing
                    r.get("scraped_at"),
                    r.get("shipping_included"),
                    r.get("free_shipping_threshold_amt"),
                    r.get("pickup_point_available"),
                    r.get("delivery_shipping_available"),
                    r.get("delivery_courier_available"),
                    r.get("cooling_off_days"),
                    r.get("free_returns"),
                    r.get("warranty_provider"),
                    r.get("warranty_duration_months"),
                    r.get("customer_service_url"),
                ))

            elif t == "EXPERT_SUPPORT":
                conn.execute("""
                    INSERT INTO expert_support (
                        competitor_id, scraped_at, source_url,
                        expert_chat_available, phone_support_available,
                        email_support_available, in_store_support,
                        expert_support_text
                    )
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                """, (
                    BOL_ID,
                    r.get("scraped_at"),
                    r.get("source_url"),
                    r.get("expert_chat_available"),
                    r.get("phone_support_available"),
                    r.get("email_support_available"),
                    r.get("in_store_support"),
                    r.get("expert_support_text"),
                ))

    conn.commit()
    print("Imported support JSONL:", SUPPORT_JSONL)
else:
    print("Missing file:", SUPPORT_JSONL)


Imported support JSONL: Online-Data-Mining/data/raw/bol/bol_support.jsonl


## Helper Functions

In [4]:
def has_col(table, col):
    return col in {r["name"] for r in conn.execute(f"PRAGMA table_info({table})")}

def add_col(table, col, coltype):
    if not has_col(table, col):
        conn.execute(f"ALTER TABLE {table} ADD COLUMN {col} {coltype}")

def uniq_idx(table, col):
    conn.execute(f"CREATE UNIQUE INDEX IF NOT EXISTS ux_{table}_{col} ON {table}({col})")

## Key Columns & Indexes cross-check

In [5]:
# keys you emit in JSONL
add_col("scraperun", "scrape_run_key", "INTEGER")
add_col("product", "product_key", "INTEGER")
add_col("productlisting", "listing_key", "INTEGER")
add_col("category", "category_key", "INTEGER")

uniq_idx("scraperun", "scrape_run_key")
uniq_idx("product", "product_key")
uniq_idx("productlisting", "listing_key")
uniq_idx("category", "category_key")

conn.commit()

## Import JSONL 

In [6]:
BOL_ID = 2

# key -> id maps
scrape_run_id_by_key = {}
product_id_by_key = {}
listing_id_by_key = {}
category_id_by_key = {}

def get_id(sql, params):
    row = conn.execute(sql, params).fetchone()
    return row[0] if row else None

with JSONL_PATH.open("r", encoding="utf-8") as f:
    for line in f:
        if not line.strip():
            continue
        r = json.loads(line)
        t = r.get("type")

        #  SCRAPERUN 
        if t == "SCRAPERUN":
            srk = r.get("scrape_run_key")
            conn.execute("""
                INSERT OR IGNORE INTO scraperun (started_at, gpt_context_hash, crawler_version, notes, scrape_run_key)
                VALUES (?, ?, ?, ?, ?)
            """, (
                r.get("started_at"),
                r.get("gpt_context_hash"),  
                r.get("crawler_version"),
                r.get("notes"),
                srk,
            ))
            conn.commit()
            scrape_run_id_by_key[srk] = get_id(
                "SELECT scrape_run_id FROM scraperun WHERE scrape_run_key=?",
                (srk,)
            )

        # CATEGORY 
        elif t == "CATEGORY":
            ck = r.get("category_key")
            conn.execute("""
                INSERT OR IGNORE INTO category (competitor_id, name, url, parent_category_id, category_key)
                VALUES (?, ?, ?, ?, ?)
            """, (
                BOL_ID,
                r.get("name"),
                r.get("url"),
                r.get("parent_category_id"),
                ck
            ))
            conn.commit()
            category_id_by_key[ck] = get_id(
                "SELECT category_id FROM category WHERE category_key=?",
                (ck,)
            )

        # PRODUCT 
        elif t == "PRODUCT":
            pk = r.get("product_key")
            conn.execute("""
                INSERT OR IGNORE INTO product (canonical_name, brand, model, product_key)
                VALUES (?, ?, ?, ?)
            """, (
                r.get("canonical_name"),
                r.get("brand"),
                r.get("model"),
                pk
            ))
            conn.commit()
            product_id_by_key[pk] = get_id(
                "SELECT product_id FROM product WHERE product_key=?",
                (pk,)
            )

        # PRODUCTLISTING
        elif t == "PRODUCTLISTING":
            lk = r.get("listing_key")
            # map JSONL -> DB column names
            image_url_src = r.get("image_url_on_page")
            gtin = r.get("gtin_on_page")

            conn.execute("""
                INSERT OR IGNORE INTO productlisting (
                    competitor_id, category_id, product_url, title_on_page,
                    image_url_src, img_url_cdn, gtin_ean_upc_on_page,
                    description_clean, listing_key
                )
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
            """, (
                BOL_ID,
                r.get("category_id"),          
                r.get("product_url"),
                r.get("title_on_page"),
                image_url_src,
                None,                          # you don't emit img_url_cdn
                gtin,
                r.get("description_clean"),
                lk
            ))
            conn.commit()
            listing_id_by_key[lk] = get_id(
                "SELECT listing_id FROM productlisting WHERE listing_key=?",
                (lk,)
            )

        # PRICESNAPSHOT 
        elif t == "PRICESNAPSHOT":
            lk = r.get("listing_key")
            srk = r.get("scrape_run_key")
            listing_id = listing_id_by_key.get(lk)
            scrape_run_id = scrape_run_id_by_key.get(srk)

            if listing_id and scrape_run_id:
                conn.execute("""
                    INSERT INTO pricesnapshot (
                        listing_id, scrape_run_id, scraped_at, currency,
                        current_price, base_price, discount_amount, discount_percent,
                        price_text, in_stock, stock_status_text
                    )
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                """, (
                    listing_id,
                    scrape_run_id,
                    r.get("scraped_at"),
                    r.get("currency"),
                    r.get("current_price"),
                    r.get("base_price"),
                    r.get("discount_amount"),
                    r.get("discount_percent"),
                    r.get("price_text"),
                    r.get("in_stock"),
                    r.get("stock_status_text"),
                ))

        # REVIEW 
        elif t == "REVIEW":
            lk = r.get("listing_key")
            listing_id = listing_id_by_key.get(lk)
            if listing_id:
                conn.execute("""
                    INSERT INTO review (
                        listing_id, created_at, rating_value, rating_scale,
                        review_count, review_text, reviewer_name,
                        verified, verified_purchase, review_url
                    )
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                """, (
                    listing_id,
                    r.get("created_at"),
                    r.get("rating_value"),
                    r.get("rating_scale"),
                    r.get("review_count"),
                    r.get("review_text"),
                    r.get("reviewer_name"),
                    r.get("verified"),
                    r.get("verified_purchase"),
                    r.get("review_url"),
                ))

        # PRODUCTMATCH 
        elif t == "PRODUCTMATCH":
            pk = r.get("product_key")
            lk = r.get("listing_key")
            product_id = product_id_by_key.get(pk)
            listing_id = listing_id_by_key.get(lk)
            if product_id and listing_id:
                conn.execute("""
                    INSERT INTO productmatch (
                        product_id, listing_id, match_method, match_score, matched_at
                    )
                    VALUES (?, ?, ?, ?, ?)
                """, (
                    product_id,
                    listing_id,
                    r.get("match_method"),
                    r.get("match_score"),
                    r.get("matched_at"),
                ))
conn.commit()

## Insert/Update Statements

In [7]:
BOL_COMPETITOR = (2, "bol.com", "NL", "https://www.bol.com")

conn.execute(
    """
    INSERT OR REPLACE INTO competitor (competitor_id, name, country, base_url)
    VALUES (?, ?, ?, ?)
    """,
    BOL_COMPETITOR,
)
conn.commit()

## Final tables 

In [8]:
def show(table):
    df = pd.read_sql_query(f"SELECT * FROM {table} LIMIT 5", conn)
    print(f"\n=== {table} ({len(df)} rows shown) ===")
    display(df)

for t in [
    "competitor","scraperun","category","product","productlisting",
    "pricesnapshot","review","productmatch","pagelink","customer_service","expert_support"
]:
    show(t)


=== competitor (4 rows shown) ===


Unnamed: 0,competitor_id,name,country,base_url
0,1,Bax-shop,NL,https://www.bax-shop.nl
1,2,bol.com,NL,https://www.bol.com
2,3,Thomann,DE,https://www.thomann.nl
3,4,MaxiAxi,NL,https://www.maxiaxi.com



=== scraperun (1 rows shown) ===


Unnamed: 0,scrape_run_id,started_at,gpt_context_hash,crawler_version,notes,scrape_run_key
0,1,2026-01-21T14:58:09.684677+00:00,,bol_products/ERD-STRICT-1.0,bol microphones only (microfoons/7119) + produ...,333239671



=== category (1 rows shown) ===


Unnamed: 0,category_id,competitor_id,name,url,parent_category_id,category_key
0,2,2,Microfoons,https://www.bol.com/nl/nl/l/microfoons/7119/,,859829754



=== product (5 rows shown) ===


Unnamed: 0,product_id,canonical_name,brand,model,product_key
0,1,Maono Maono DGM20S - USB Gaming Microfoon met ...,Maono,,1678028784
1,2,Nuvance Nuvance Microfoon - Gaming Microfoon m...,Nuvance,,215090221
2,3,Lenco - draadloze arkering,,arkering,1276485845
3,4,SynergyX SynergyX SonicFlex Pro ll Microfoon -...,SynergyX,,748634119
4,5,Fenton Microfoon - Dynamische microfoon Zwart ...,Fenton,,1343329707



=== productlisting (5 rows shown) ===


Unnamed: 0,listing_id,competitor_id,category_id,product_url,title_on_page,image_url_src,img_url_cdn,gtin_ean_upc_on_page,description_clean,listing_key
0,126,2,,https://www.bol.com/nl/nl/p/lenco-mcw-011bk-dr...,Lenco - draadloze,https://media.s-bol.com/xNEE6w9ZW7xE/yL8wGz/68...,,8711902069515.0,<p>De <strong>Lenco MCW-011</strong> is een dr...,349221545
1,127,2,,https://www.bol.com/nl/nl/p/maono-dgm20s-usb-m...,Maono DGM20S - USB Gaming Microfoon met Arm - ...,https://media.s-bol.com/rowEZ6KP9BYp/J6NvvLJ/1...,,8720165730995.0,<p>Stap in een wereld van zuiver geluid met de...,806731873
2,128,2,,https://www.bol.com/nl/nl/p/nuvance-usb-microf...,Nuvance Microfoon - Gaming Microfoon met en Vo...,https://media.s-bol.com/7RpLYw7rElRr/qjE8zmr/8...,,,<p><strong>Ben je op zoek naar een microfoon d...,1818683271
3,129,2,,https://www.bol.com/nl/nl/p/lenco-mcw-020bk-se...,Lenco - draadloze,https://media.s-bol.com/mVppOpVxWv1E/M3jZPP/97...,,8711902069522.0,<p>De <strong>Lenco MCW-020</strong> is een se...,1968379926
4,130,2,,https://www.bol.com/nl/nl/p/synergyx-xc7/93000...,SynergyX SonicFlex Pro ll Microfoon - USB Gami...,https://media.s-bol.com/RqR69x27kNZq/lOR3DP1/1...,,8720938208508.0,<h3>Deze vernieuwde 2026 variant wordt gelever...,315362301



=== pricesnapshot (5 rows shown) ===


Unnamed: 0,price_snapshot_id,listing_id,scrape_run_id,scraped_at,currency,current_price,base_price,discount_amount,discount_percent,price_text,in_stock,stock_status_text
0,126,127,1,2026-01-21T14:59:12.480433+00:00,EUR,49.49,,,,€ 49.49,1.0,
1,127,128,1,2026-01-21T14:59:24.534125+00:00,EUR,39.99,49.99,10.0,20.0,€ 39.99,,
2,128,129,1,2026-01-21T15:00:08.509518+00:00,EUR,31.94,56.99,25.05,43.96,€ 31.94,,
3,129,130,1,2026-01-21T15:00:32.962688+00:00,EUR,59.95,59.95,,,€ 59.95,,
4,130,131,1,2026-01-21T15:00:48.274830+00:00,EUR,11.9,,,,€ 11.90,1.0,



=== review (5 rows shown) ===


Unnamed: 0,review_id,listing_id,created_at,rating_value,rating_scale,review_count,review_text,reviewer_name,verified,verified_purchase,review_url
0,66,127,2026-01-21T14:59:12.480433+00:00,4.5,5,46,,,,,https://www.bol.com/nl/nl/p/maono-dgm20s-usb-m...
1,67,128,2026-01-21T14:59:24.534125+00:00,4.3,5,52,,,,,https://www.bol.com/nl/nl/p/nuvance-usb-microf...
2,68,129,2026-01-21T15:00:08.509518+00:00,4.3,5,32,,,,,https://www.bol.com/nl/nl/p/lenco-mcw-020bk-se...
3,69,130,2026-01-21T15:00:32.962688+00:00,4.7,5,26,,,,,https://www.bol.com/nl/nl/p/synergyx-xc7/93000...
4,70,131,2026-01-21T15:00:48.274830+00:00,3.9,5,31,,,,,https://www.bol.com/nl/nl/p/microfoon-dynamisc...



=== productmatch (5 rows shown) ===


Unnamed: 0,match_id,product_id,listing_id,match_method,match_score,matched_at
0,126,1,127,gtin,1.0,2026-01-21T14:59:12.480433+00:00
1,127,2,128,canonical_name,0.7,2026-01-21T14:59:24.534125+00:00
2,128,3,129,gtin,1.0,2026-01-21T15:00:08.509518+00:00
3,129,4,130,gtin,1.0,2026-01-21T15:00:32.962688+00:00
4,130,5,131,gtin,1.0,2026-01-21T15:00:48.274830+00:00



=== pagelink (0 rows shown) ===


Unnamed: 0,page_id,competitor_id,page_type,url



=== customer_service (5 rows shown) ===


Unnamed: 0,customer_service_id,competitor_id,listing_id,scraped_at,shipping_included,free_shipping_threshold_amt,pickup_point_available,delivery_shipping_available,delivery_courier_available,cooling_off_days,free_returns,warranty_provider,warranty_duration_months,customer_service_url
0,1,2,,2026-01-21T16:56:27.609764+00:00,,,,,,30,,,,https://www.bol.com/nl/nl/klantenservice/
1,2,2,,2026-01-21T16:56:27.612652+00:00,,,,,,30,,,,https://www.bol.com/nl/nl/klantenservice/
2,3,2,,2026-01-21T16:56:27.613410+00:00,,,,,,30,,,,https://www.bol.com/nl/nl/klantenservice/
3,4,2,,2026-01-21T16:56:27.613963+00:00,,,,,,30,,,,https://www.bol.com/nl/nl/klantenservice/
4,5,2,,2026-01-21T16:56:27.614384+00:00,,,,,,30,,,,https://www.bol.com/nl/nl/klantenservice/



=== expert_support (5 rows shown) ===


Unnamed: 0,expert_support_id,competitor_id,scraped_at,source_url,expert_chat_available,phone_support_available,email_support_available,in_store_support,expert_support_text
0,1,2,2026-01-21T16:56:27.609764+00:00,https://www.bol.com/nl/nl/p/nedis-microfoon-ge...,,,,0,
1,2,2,2026-01-21T16:56:27.612652+00:00,https://www.bol.com/nl/nl/p/audiomate-dynamisc...,,,,0,
2,3,2,2026-01-21T16:56:27.613410+00:00,https://www.bol.com/nl/nl/p/blue-microphones-b...,,,,0,
3,4,2,2026-01-21T16:56:27.613963+00:00,https://www.bol.com/nl/nl/p/draadloze-microfoo...,,,,0,
4,5,2,2026-01-21T16:56:27.614384+00:00,https://www.bol.com/nl/nl/p/sennheiser-cardioi...,,,,0,
