# üîç RealOEM BMW Parts Scraper

Scrapes part data from [realoem.com](https://www.realoem.com) given one or more BMW part barcodes.
Based on the proven logic from `app.py`.

In [14]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup
from datetime import datetime
import subprocess
import requests
import csv
import time
import re
import os

In [15]:
# üîß Change these before running
BARCODES = [
    "11367614288",
]

THREAD_WORKERS = 10   # parallel requests workers for vehicle tag collection

print(f"‚úÖ {len(BARCODES)} barcode(s) configured: {BARCODES}")
print(f"   Thread workers : {THREAD_WORKERS}")

‚úÖ 1 barcode(s) configured: ['11367614288']
   Thread workers : 10


## Setup

Kill stale Chrome processes, launch a fresh browser, and load helper functions.
**Run these cells once per session.**

In [16]:
# Kill ALL orphaned chromedriver / Chrome-for-Testing processes
# (prevents NoSuchWindowException from zombie sessions)
for target in ['chromedriver', 'Google Chrome for Testing']:
    result = subprocess.run(['pgrep', '-f', target], capture_output=True, text=True)
    pids = [p for p in result.stdout.strip().split() if p and p != str(os.getpid())]
    for pid in pids:
        subprocess.run(['kill', '-9', pid], capture_output=True)
        print(f"üßπ Killed stale '{target}' PID {pid}")
    if not pids:
        print(f"‚úÖ No stale '{target}' processes")

time.sleep(1)

# Launch Chrome
options = uc.ChromeOptions()
options.add_argument('--start-maximized')
options.add_argument('--disable-blink-features=AutomationControlled')
options.add_argument('--no-first-run')
options.add_argument('--no-service-autorun')
options.add_argument('--disable-popup-blocking')

driver = uc.Chrome(options=options)
wait = WebDriverWait(driver, 15)
realoem_cookie_handled = False

# Verify the browser is actually alive
time.sleep(2)
try:
    _ = driver.window_handles
    print(f"‚úÖ Browser ready  |  URL: {driver.current_url}")
except Exception as e:
    print(f"‚ùå Browser failed to start: {type(e).__name__}: {e}")

üßπ Killed stale 'chromedriver' PID 19089
‚úÖ No stale 'Google Chrome for Testing' processes
‚úÖ Browser ready  |  URL: chrome://new-tab-page/


In [17]:
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
#  Helper functions  (from app.py ‚Äî proven in production)
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

def kill_overlays(drv):
    """Remove all modal popups, overlays, and enable scrolling."""
    drv.execute_script("""
        const selectors = [
            '.modal', '.popup', '.overlay', '.backdrop',
            '.cookie', '.cookies', '.consent',
            '[role="dialog"]',
            '[class*="modal"]', '[class*="popup"]', '[class*="overlay"]',
            '.ro-modal', '[data-ro]'
        ];
        selectors.forEach(sel => {
            document.querySelectorAll(sel).forEach(e => e.remove());
        });
        document.body.style.overflow = 'auto';
    """)


def handle_realoem_cookie(drv):
    """Accept the cookie banner once per session."""
    global realoem_cookie_handled
    if realoem_cookie_handled:
        return
    try:
        cookie_btn = WebDriverWait(drv, 3).until(
            EC.element_to_be_clickable(
                (By.XPATH, "//button[contains(., 'Accept') or contains(., 'Agree')]")
            )
        )
        cookie_btn.click()
        realoem_cookie_handled = True
        print("‚úÖ Cookie banner accepted")
    except:
        realoem_cookie_handled = True


def handle_subscription_popup(drv):
    """Dismiss the subscription popup via any known selector."""
    for selector in [
        'button[data-ro="later"]',
        'button.ro-btn.ro-secondary[data-ro="later"]',
        'button[data-ro="close"]',
        'button.ro-close[data-ro="close"]',
    ]:
        try:
            drv.find_element(By.CSS_SELECTOR, selector).click()
            time.sleep(0.2)
            return True
        except:
            continue
    return False


def close_extra_tabs(drv):
    """Close any unexpected popup tabs."""
    main = drv.window_handles[0]
    for h in drv.window_handles:
        if h != main:
            drv.switch_to.window(h)
            drv.close()
    drv.switch_to.window(main)


def aggressive_popup_killer(drv):
    """Run all popup-removal routines in sequence."""
    try:
        kill_overlays(drv)
        handle_subscription_popup(drv)
        handle_realoem_cookie(drv)
        close_extra_tabs(drv)
    except:
        pass


def is_driver_alive(drv):
    """Return True if the Chrome window is still open and responsive."""
    try:
        _ = drv.window_handles   # window_handles is a real RPC ‚Äî not cached like .title
        return True
    except Exception:
        return False


def revive_driver():
    """Quit the dead session, kill orphans, and start a fresh Chrome window."""
    global driver, wait, realoem_cookie_handled
    print("üîÑ Reviving browser...")
    try:
        driver.quit()
    except Exception:
        pass
    # Kill any orphaned chromedriver/Chrome processes
    for target in ['chromedriver', 'Google Chrome for Testing']:
        subprocess.run(['pkill', '-f', target], capture_output=True)
    time.sleep(1)

    opts = uc.ChromeOptions()
    opts.add_argument('--start-maximized')
    opts.add_argument('--disable-blink-features=AutomationControlled')
    opts.add_argument('--no-first-run')
    opts.add_argument('--no-service-autorun')
    opts.add_argument('--disable-popup-blocking')
    driver = uc.Chrome(options=opts)
    time.sleep(2)   # let browser fully initialise
    wait = WebDriverWait(driver, 15)
    realoem_cookie_handled = False
    # Verify
    try:
        _ = driver.window_handles
        print("‚úÖ Browser revived")
    except Exception as e:
        print(f"‚ùå Revive failed: {e}")


def safe_navigate_realoem(drv, url):
    """Navigate to a RealOEM URL. Simple and proven ‚Äî matches app.py logic."""
    global driver, wait
    if not is_driver_alive(drv):
        revive_driver()
        drv = driver
    try:
        drv.get(url)
    except Exception:
        revive_driver()
        drv = driver
        driver.get(url)
    aggressive_popup_killer(driver)


# ‚îÄ‚îÄ Requests-based threaded tag-fetching ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

def _get_driver_session_kwargs(drv):
    """Extract cookies + User-Agent from the live browser for requests workers."""
    cookies = {c['name']: c['value'] for c in drv.get_cookies()}
    user_agent = drv.execute_script('return navigator.userAgent')
    return {
        'cookies': cookies,
        'headers': {
            'User-Agent':      user_agent,
            'Accept':          'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Referer':         'https://www.realoem.com/',
            'Connection':      'keep-alive',
        },
    }


def _fetch_series_tags_worker(series_info, session_kwargs):
    """Fetch one series page via requests, parse tags. Runs in ThreadPoolExecutor."""
    series_url  = series_info['url'].split('#')[0]
    series_text = series_info.get('text', series_url)
    tags = []
    try:
        sess = requests.Session()
        sess.cookies.update(session_kwargs['cookies'])
        sess.headers.update(session_kwargs['headers'])
        resp = sess.get(series_url, timeout=15)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, 'html.parser')
        results_div = soup.find('div', class_='partSearchResults')
        if results_div:
            for li in results_div.select('ul li'):
                raw = li.get_text(separator=' ', strip=True)
                match = re.search(r'\(([^)]+)\)', raw)
                if match:
                    tag = match.group(1)
                    if tag not in tags:
                        tags.append(tag)
    except Exception:
        pass
    return series_text, tags


print("‚úÖ Helper functions loaded")

‚úÖ Helper functions loaded


## Step-by-Step Test

### Step 1 ‚Äî Navigate to Part Page

In [23]:
test_barcode    = BARCODES[0]
numeric_barcode = re.sub(r'\D', '', str(test_barcode))

print(f"Barcode      : {test_barcode}")
print(f"Numeric form : {numeric_barcode}")
print("-" * 50)

url = f"https://www.realoem.com/bmw/enUS/partxref?q={numeric_barcode}"
safe_navigate_realoem(driver, url)
time.sleep(0.5)
aggressive_popup_killer(driver)

# Wait for error div OR page content ‚Äî whichever loads first
try:
    WebDriverWait(driver, 10).until(
        lambda d: len(d.find_elements(By.CSS_SELECTOR, "div.error.vs2")) > 0 or
                  len(d.find_elements(By.CSS_SELECTOR, "div.content h1")) > 0
    )
except:
    aggressive_popup_killer(driver)
    time.sleep(0.5)
    try:
        WebDriverWait(driver, 5).until(
            lambda d: len(d.find_elements(By.CSS_SELECTOR, "div.error.vs2")) > 0 or
                      len(d.find_elements(By.CSS_SELECTOR, "div.content h1")) > 0
        )
    except:
        pass

aggressive_popup_killer(driver)
print(f"‚úÖ Page loaded  |  URL: {driver.current_url[:80]}")

Barcode      : 11367614288
Numeric form : 11367614288
--------------------------------------------------
üîÑ Reviving browser...
‚úÖ Browser revived
‚úÖ Page loaded  |  URL: https://www.realoem.com/bmw/enUS/partxref?q=11367614288


### Step 2 ‚Äî Extract Part Data

In [25]:
part_found = True

# Check for "not found" error BEFORE attempting content extraction
try:
    error_div = driver.find_element(By.CSS_SELECTOR, "div.error.vs2")
    if "not found" in error_div.text.strip().lower():
        print(f"‚ö†Ô∏è  Part not found: {error_div.text.strip()}")
        part_found = False
        product_data = {"part_number": "NOT FOUND", "description": error_div.text.strip(), "vehicles": []}
except:
    pass

if part_found:
    try:
        part_number = driver.find_element(By.CSS_SELECTOR, "div.content h1").text
        description = driver.find_element(By.CSS_SELECTOR, "div.content h2").text
    except:
        print("‚ùå Content failed to load (timeout or popup blocking)")
        part_found = False

if part_found:
    print(f"Part Number : {part_number}")
    print(f"Description : {description}")
    print("-" * 50)

    # Details from <dl>
    part_details = {}
    try:
        dts = driver.find_elements(By.CSS_SELECTOR, "div.content dl dt")
        dds = driver.find_elements(By.CSS_SELECTOR, "div.content dl dd")
        for dt, dd in zip(dts, dds):
            key   = dt.text.replace(":", "").strip()
            value = dd.text.strip() or "-"
            part_details[key] = value
            print(f"  {key}: {value}")
    except Exception as e:
        print(f"‚ö†Ô∏è  Error extracting details: {e}")

    # Vehicle links
    vehicle_links = []
    try:
        for link in driver.find_elements(By.CSS_SELECTOR, "div.partSearchResults ul li a"):
            vehicle_links.append({"text": link.text, "url": link.get_attribute("href")})
    except Exception as e:
        print(f"‚ö†Ô∏è  Error extracting vehicle links: {e}")

    product_data = {
        "part_number": part_number,
        "description": description,
        **part_details,
        "vehicles": vehicle_links,
    }

    print(f"\n‚úÖ Found {len(vehicle_links)} compatible vehicle(s)")

Part Number : 11367614288
Description : Actuator, VANOS
--------------------------------------------------
  From: 12/02/2013
  To: -
  Weight: 0.233 kg
  Price: -
  11368482268: (12/09/2019 ‚Äî 03/09/2022)

‚úÖ Found 104 compatible vehicle(s)


### Step 3 ‚Äî Collect All Vehicle Tags

In [28]:
all_vehicle_models = []

if not vehicle_links:
    print("‚ö†Ô∏è  No vehicle links found ‚Äî run Step 2 first")
else:
    print(f"Extracting car models from {len(vehicle_links)} vehicle link(s)...\n")
    for link in vehicle_links:
        text = link['text']
        model = text.split('‚ÄÉ')[0].strip()
        if model not in all_vehicle_models:
            all_vehicle_models.append(model)
            print(f"  Found model: {model}")

    print(f"\n‚úÖ Collected {len(all_vehicle_models)} unique car model(s)")

    # Now, collect engine codes from the first 1-3 series
    num_to_visit = min(3, len(vehicle_links))
    engine_codes = set()
    print(f"\nVisiting first {num_to_visit} series page(s) for engine codes...\n")

    original_url = driver.current_url
    for i in range(num_to_visit):
        link = vehicle_links[i]
        series_url = link['url'].split('#')[0]
        series_text = link['text'].split('‚ÄÉ')[0].strip()
        print(f"  Visiting {series_text}...")
        try:
            safe_navigate_realoem(driver, series_url)
            time.sleep(0.5)
            aggressive_popup_killer(driver)
            
            # Extract engine codes
            try:
                results_div = driver.find_element(By.CSS_SELECTOR, "div.partSearchResults")
                lis = results_div.find_elements(By.CSS_SELECTOR, "ul li")
                for li in lis:
                    raw = li.text
                    parts = raw.split(',')
                    if len(parts) > 3:
                        engine = parts[3].strip()
                        engine_codes.add(engine)
                        print(f"    Found engine: {engine}")
            except Exception as e:
                print(f"    ‚ùå Extraction error: {str(e)[:60]}")
        except Exception as e:
            print(f"    ‚ùå Error: {str(e)[:60]}")

    # Return to original page
    safe_navigate_realoem(driver, original_url)
    time.sleep(0.5)
    aggressive_popup_killer(driver)

    print(f"\n‚úÖ Collected {len(engine_codes)} unique engine code(s): {sorted(engine_codes)}")

Extracting car models from 104 vehicle link(s)...

  Found model: 1' F20 LCI
  Found model: 1' F21 LCI
  Found model: 1' F40
  Found model: 2' F22
  Found model: 2' F22 LCI
  Found model: 2' G42
  Found model: 2' G87 M2
  Found model: 2' F23
  Found model: 2' F23 LCI
  Found model: 2' F44 Gran Coup√©
  Found model: 2' F45 Active Tourer
  Found model: 2' F45 Active Tourer LCI
  Found model: 2' F46 Gran Tourer
  Found model: 2' F46 Gran Tourer LCI
  Found model: 3' F30 LCI
  Found model: 3' G20 Sedan
  Found model: 3' G20 Sedan LCI
  Found model: 3' G80 M3
  Found model: 3' G80 M3 LCI
  Found model: 3' G28 Sedan
  Found model: 3' G28 Sedan LCI
  Found model: 3' F31 LCI
  Found model: 3' G21 Touring
  Found model: 3' G21 Touring LCI
  Found model: 3' G81 M3 Touring
  Found model: 3' G81 M3 Touring LCI
  Found model: 3' F34 GT LCI
  Found model: 4' F32
  Found model: 4' F32 LCI
  Found model: 4' G22 Coup√©
  Found model: 4' G22 Coup√© LCI
  Found model: 4' G82 M4 Coup√©
  Found model: 4' G

### Step 4 ‚Äî Show Vehicle Tags

In [29]:
print(f"Total unique car models: {len(all_vehicle_models)}\n")
for i, model in enumerate(all_vehicle_models, 1):
    print(f"  {i:>3}. {model}")

print(f"\nTotal unique engine codes: {len(engine_codes)}\n")
for i, code in enumerate(sorted(engine_codes), 1):
    print(f"  {i:>3}. {code}")

Total unique car models: 104

    1. 1' F20 LCI
    2. 1' F21 LCI
    3. 1' F40
    4. 2' F22
    5. 2' F22 LCI
    6. 2' G42
    7. 2' G87 M2
    8. 2' F23
    9. 2' F23 LCI
   10. 2' F44 Gran Coup√©
   11. 2' F45 Active Tourer
   12. 2' F45 Active Tourer LCI
   13. 2' F46 Gran Tourer
   14. 2' F46 Gran Tourer LCI
   15. 3' F30 LCI
   16. 3' G20 Sedan
   17. 3' G20 Sedan LCI
   18. 3' G80 M3
   19. 3' G80 M3 LCI
   20. 3' G28 Sedan
   21. 3' G28 Sedan LCI
   22. 3' F31 LCI
   23. 3' G21 Touring
   24. 3' G21 Touring LCI
   25. 3' G81 M3 Touring
   26. 3' G81 M3 Touring LCI
   27. 3' F34 GT LCI
   28. 4' F32
   29. 4' F32 LCI
   30. 4' G22 Coup√©
   31. 4' G22 Coup√© LCI
   32. 4' G82 M4 Coup√©
   33. 4' G82 M4 Coup√© LCI
   34. 4' F33
   35. 4' F33 LCI
   36. 4' G23 Convertible
   37. 4' G23 Convertible LCI
   38. 4' G83 M4 Convertible
   39. 4' G83 M4 Convertible LCI
   40. 4' F36 Gran Coup√©
   41. 4' F36 Gran Coup√© LCI
   42. 4' G26 Gran Coup√©
   43. 4' G26 Gran Coup√© LCI
   44.

---
## üöÄ Batch Scraper

Run `scrape_realoem_barcode()` against every barcode in the `BARCODES` list.
Define the reusable function first, then execute the batch loop.

In [None]:
def scrape_realoem_barcode(barcode):
    """
    Scrape BMW part data from realoem.com for a single barcode.
    Matches the production app.py logic.
    """
    global driver

    numeric_barcode = re.sub(r'\D', '', str(barcode))
    if not numeric_barcode:
        return {"success": False, "error": "Invalid barcode ‚Äî no numeric digits found"}

    url = f"https://www.realoem.com/bmw/enUS/partxref?q={numeric_barcode}"
    safe_navigate_realoem(driver, url)
    time.sleep(0.5)
    aggressive_popup_killer(driver)

    # Wait for error div OR content
    try:
        WebDriverWait(driver, 10).until(
            lambda d: len(d.find_elements(By.CSS_SELECTOR, "div.error.vs2")) > 0 or
                      len(d.find_elements(By.CSS_SELECTOR, "div.content h1")) > 0
        )
    except:
        aggressive_popup_killer(driver)
        time.sleep(0.5)
        try:
            WebDriverWait(driver, 5).until(
                lambda d: len(d.find_elements(By.CSS_SELECTOR, "div.error.vs2")) > 0 or
                          len(d.find_elements(By.CSS_SELECTOR, "div.content h1")) > 0
            )
        except:
            pass

    aggressive_popup_killer(driver)

    # Part not found?
    try:
        error_div = driver.find_element(By.CSS_SELECTOR, "div.error.vs2")
        if "not found" in error_div.text.strip().lower():
            return {
                "product": {"part_number": "NOT FOUND", "description": error_div.text.strip()},
                "pricing": {"price": None},
                "details": {"from_date": None, "to_date": None, "weight": None},
                "compatibility": {"vehicle_count": 0, "vehicle_tags": []},
            }
    except:
        pass

    # Extract main content
    try:
        part_number = driver.find_element(By.CSS_SELECTOR, "div.content h1").text
        description = driver.find_element(By.CSS_SELECTOR, "div.content h2").text
    except:
        return {"success": False, "error": "Content failed to load (timeout or popup blocking)"}

    # Details from <dl>
    part_details = {}
    try:
        dts = driver.find_elements(By.CSS_SELECTOR, "div.content dl dt")
        dds = driver.find_elements(By.CSS_SELECTOR, "div.content dl dd")
        for dt, dd in zip(dts, dds):
            key   = dt.text.replace(":", "").strip()
            value = dd.text.strip() or "-"
            part_details[key] = value
    except Exception as e:
        print(f"  ‚ö†Ô∏è  Details error: {str(e)[:60]}")

    # Series links
    series_links = []
    try:
        for link in driver.find_elements(By.CSS_SELECTOR, "div.partSearchResults ul li a"):
            series_links.append({"text": link.text, "url": link.get_attribute("href")})
    except Exception as e:
        print(f"  ‚ö†Ô∏è  Series links error: {str(e)[:60]}")

    # Extract unique vehicle series
    vehicle_series = []
    for sl in series_links:
        text = sl['text']
        model = text.split('‚ÄÉ')[0].strip()
        if model not in vehicle_series:
            vehicle_series.append(model)

    # Collect engine codes from first 1-3 series
    num_to_visit = min(3, len(series_links))
    engine_codes = set()
    original_url = driver.current_url
    for i in range(num_to_visit):
        sl = series_links[i]
        series_url = sl['url'].split('#')[0]
        try:
            safe_navigate_realoem(driver, series_url)
            time.sleep(0.5)
            aggressive_popup_killer(driver)
            
            # Extract
            try:
                results_div = driver.find_element(By.CSS_SELECTOR, "div.partSearchResults")
                lis = results_div.find_elements(By.CSS_SELECTOR, "ul li")
                for li in lis:
                    raw = li.text
                    parts = raw.split(',')
                    if len(parts) > 3:
                        engine = parts[3].strip()
                        engine_codes.add(engine)
            except Exception as e:
                print(f"  ‚ö†Ô∏è  Engine codes error: {str(e)[:60]}")
        except Exception as e:
            print(f"  ‚ö†Ô∏è  Navigation error: {str(e)[:60]}")

    # Return to original page
    safe_navigate_realoem(driver, original_url)
    time.sleep(0.5)
    aggressive_popup_killer(driver)

    return {
        "product": {"part_number": part_number, "description": description},
        "pricing": {"price": part_details.get("Price") or None},
        "details": {
            "from_date": part_details.get("From") or None,
            "to_date":   part_details.get("To")   or None,
            "weight":    part_details.get("Weight") or None,
        },
        "compatibility": {
            "vehicle_series": vehicle_series,
            "engine_codes":   list(engine_codes),
        },
    }


print("‚úÖ scrape_realoem_barcode() ready")

In [None]:
all_scraped_data = []
start_time     = time.time()
start_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

print(f"üöÄ Scraping {len(BARCODES)} barcode(s) ‚Äî started at {start_datetime}")
print("=" * 60)

for idx, barcode in enumerate(BARCODES, 1):
    item_start = time.time()
    timestamp  = datetime.now().strftime("%H:%M:%S")
    print(f"[{idx}/{len(BARCODES)}] [{timestamp}] {barcode}", end="  ")

    try:
        result = scrape_realoem_barcode(barcode)

        # Failure returned by the scraper
        if result.get("success") is False or result.get("error"):
            item_time = time.time() - item_start
            error_msg = result.get("error", "Unknown error")
            print(f"‚ùå {error_msg[:60]} ({item_time:.1f}s)")
            all_scraped_data.append({
                "barcode": barcode, "part_number": "ERROR",
                "description": error_msg,
                "from_date": "", "to_date": "", "weight": "", "price": "",
                "vehicle_series": "", "engine_codes": "",
                "scrape_time_seconds": round(item_time, 2),
            })
            continue

        product       = result.get("product", {})
        pricing       = result.get("pricing", {})
        details       = result.get("details", {})
        compatibility = result.get("compatibility", {})
        item_time     = time.time() - item_start

        # Part not found
        if product.get("part_number") == "NOT FOUND":
            print(f"‚ö†Ô∏è  NOT FOUND ({item_time:.1f}s)")
            all_scraped_data.append({
                "barcode": barcode, "part_number": "NOT FOUND",
                "description": product.get("description", ""),
                "from_date": "", "to_date": "", "weight": "", "price": "",
                "vehicle_series": "", "engine_codes": "",
                "scrape_time_seconds": round(item_time, 2),
            })
            continue

        series_list = compatibility.get("vehicle_series", [])
        engines_list = compatibility.get("engine_codes", [])
        all_scraped_data.append({
            "barcode":             barcode,
            "part_number":         product.get("part_number", ""),
            "description":         product.get("description", ""),
            "from_date":           details.get("from_date") or "",
            "to_date":             details.get("to_date")   or "",
            "weight":              details.get("weight")    or "",
            "price":               pricing.get("price")     or "",
            "vehicle_series":      " | ".join(series_list),
            "engine_codes":        " | ".join(engines_list),
            "scrape_time_seconds": round(item_time, 2),
        })
        print(f"‚úÖ {product.get('part_number', '')[:30]} | {len(series_list)} series | {len(engines_list)} engines ({item_time:.1f}s)")

    except Exception as e:
        item_time  = time.time() - item_start
        error_type = type(e).__name__
        error_msg  = str(e).split('\n')[0][:200]
        print(f"‚ùå {error_type}: {error_msg[:50]} ({item_time:.1f}s)")
        all_scraped_data.append({
            "barcode": barcode, "part_number": "ERROR",
            "description": f"{error_type}: {error_msg}",
            "from_date": "", "to_date": "", "weight": "", "price": "",
            "vehicle_series": "", "engine_codes": "",
            "scrape_time_seconds": round(item_time, 2),
        })

    time.sleep(0.3)

total_time   = time.time() - start_time
end_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
avg_time     = total_time / len(BARCODES) if BARCODES else 0

print("\n" + "=" * 60)
print(f"‚úÖ  Done ‚Äî {len(all_scraped_data)} result(s) collected")
print(f"    Started : {start_datetime}")
print(f"    Finished: {end_datetime}")
print(f"    Total   : {total_time/60:.1f} min  ({total_time:.1f}s)")
print(f"    Avg/part: {avg_time:.1f}s")
print("=" * 60)

## Export Results

Preview the scraped data, then save to **CSV** and **Excel**.

In [None]:
# Use batch results if available, otherwise fall back to the single step-by-step result
if 'all_scraped_data' in dir() and all_scraped_data:
    data_to_preview = all_scraped_data
    print(f"Showing batch results ‚Äî {len(data_to_preview)} entr{'y' if len(data_to_preview) == 1 else 'ies'}\n")
elif 'product_data' in dir() and product_data:
    pn      = product_data.get('part_number', '‚Äî')
    desc    = product_data.get('description', '‚Äî')
    from_d  = product_data.get('From', '‚Äî')
    to_d    = product_data.get('To', '‚Äî')
    weight  = product_data.get('Weight', '‚Äî')
    vehicles = product_data.get('vehicles', [])
    series   = all_vehicle_models if 'all_vehicle_models' in dir() else []
    engines  = list(engine_codes) if 'engine_codes' in dir() else []
    print("Showing step-by-step test result\n")
    print(f"  Part Number  : {pn}")
    print(f"  Description  : {desc}")
    print(f"  From         : {from_d}  |  To: {to_d}  |  Weight: {weight}")
    print(f"  Series count : {len(vehicles)}")
    print(f"  Unique series: {len(series)}")
    print(f"  Engine codes : {len(engines)}")
    data_to_preview = []
else:
    print("‚ö†Ô∏è  No data yet ‚Äî run the batch scraper or complete the step-by-step test.")
    data_to_preview = []

for i, item in enumerate(data_to_preview[:5], 1):
    print(f"{i}. {item['barcode']}  ‚Üí  {item['part_number']} ‚Äî {item['description']}")
    print(f"   From: {item['from_date'] or '‚Äî'}  |  To: {item['to_date'] or '‚Äî'}  |  Weight: {item['weight'] or '‚Äî'}")
    print(f"   Series: {len(item['vehicle_series'].split(' | ')) if item['vehicle_series'] else 0}  |  Engines: {len(item['engine_codes'].split(' | ')) if item['engine_codes'] else 0}")
    print()

In [None]:
output_csv = "scraped_bmw_parts.csv"

if all_scraped_data:
    fieldnames = list(all_scraped_data[0].keys())
    with open(output_csv, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(all_scraped_data)
    print(f"‚úÖ CSV saved ‚Üí {output_csv}  ({len(all_scraped_data)} rows)")
else:
    print("‚ö†Ô∏è  No data to save")

In [None]:
from openpyxl import Workbook

output_xlsx = "scraped_bmw_parts.xlsx"

if all_scraped_data:
    wb_out = Workbook()
    ws     = wb_out.active
    ws.title = "BMW Parts"

    headers = list(all_scraped_data[0].keys())
    ws.append(headers)
    for item in all_scraped_data:
        ws.append([item.get(h, "") for h in headers])

    # Auto-fit column widths (capped at 50)
    for col in ws.columns:
        max_len = max((len(str(cell.value or "")) for cell in col), default=0)
        ws.column_dimensions[col[0].column_letter].width = min(max_len + 2, 50)

    wb_out.save(output_xlsx)
    print(f"‚úÖ Excel saved ‚Üí {output_xlsx}  ({len(all_scraped_data)} rows)")
else:
    print("‚ö†Ô∏è  No data to save")

## Cleanup

Close the browser when you're done.

In [None]:
try:
    driver.quit()
    print("‚úÖ Browser closed")
except Exception as e:
    print(f"‚ö†Ô∏è  {e}")