In [3]:
import asyncio
from playwright.async_api import async_playwright
import pandas as pd

async def scrape_all_prefixes():
    """
    Scrapes all BCBS prefix pages (A-Z ranges) from the BCBS Prefix List site,
    collects each prefix and its associated name, and exports the results to CSV.
    """
    try:
        print("Starting Playwright")
        async with async_playwright() as playwright:
            browser = await playwright.chromium.launch(headless=True)
            page = await browser.new_page()

            # Step 1: Navigate to the main index of BCBS prefix ranges
            print("Navigating to BCBS prefix list index")
            try:
                await page.goto(
                    "https://mypayerdirectory.com/bcbs-prefix-list/",
                    timeout=60000,
                    wait_until="domcontentloaded"
                )
            except Exception as navigation_error:
                print(f"Error navigating to index page: {navigation_error}")
                await browser.close()
                return

            # Allow time for JavaScript to inject the “XAA – XZZ” links if necessary
            try:
                await page.wait_for_selector('a[href*="/bcbs-prefix-list/"][href*="-to-"]', timeout=10000)
            except Exception:
                # If the selector times out, proceed with whatever links are already present
                pass

            # Step 2: Extract every <a> under "/bcbs-prefix-list/" whose href contains "-to-"
            print("Extracting subpage links for A to Z ranges...")
            try:
                raw_links = await page.eval_on_selector_all(
                    'a[href*="/bcbs-prefix-list/"]',
                    """
                    elements =>
                      elements
                        .map(el => el.href)
                        .filter(href => href.includes('-to-'))
                    """
                )
            except Exception as link_extraction_error:
                print(f"Error extracting subpage links: {link_extraction_error}")
                raw_links = []

            # Deduplicate and sort the list of subpage URLs
            subpage_links = list(dict.fromkeys(raw_links))
            subpage_links.sort()

            if not subpage_links:
                print("No subpage links found; exiting.")
                await browser.close()
                return

            print("Subpage links found:")
            for url in subpage_links:
                print(f"  • {url}")

            all_rows = []

            # Step 3: Visit each subpage and collect prefix/name pairs, handling pagination
            for link in subpage_links:
                print(f"\nScraping subpage: {link}")
                try:
                    await page.goto(link, timeout=60000, wait_until="domcontentloaded")
                except Exception as subpage_navigation_error:
                    print(f"Error navigating to {link}: {subpage_navigation_error}")
                    continue

                # Brief pause to ensure the table has rendered
                await page.wait_for_timeout(2000)

                while True:
                    # 3.1: Query all <tr> rows in the table body
                    try:
                        rows = await page.query_selector_all("table tbody tr")
                    except Exception as row_query_error:
                        print(f"Error querying rows on {link}: {row_query_error}")
                        break

                    for row in rows:
                        try:
                            prefix_cell = await row.query_selector("td:nth-child(1)")
                            name_cell   = await row.query_selector("td:nth-child(2)")
                        except Exception as cell_selection_error:
                            print(f"Error selecting cells: {cell_selection_error}")
                            continue

                        if prefix_cell and name_cell:
                            try:
                                prefix_text = (await prefix_cell.inner_text()).strip()
                                name_text   = (await name_cell.inner_text()).strip()
                            except Exception as text_extraction_error:
                                print(f"Error extracting cell text: {text_extraction_error}")
                                continue

                            if prefix_text and name_text:
                                all_rows.append([prefix_text, name_text])

                    # 3.2: Check for a “Next” button to handle pagination
                    try:
                        next_button = await page.query_selector("a.paginate_button.next")
                    except Exception as next_button_error:
                        print(f"Error locating Next button: {next_button_error}")
                        break

                    if not next_button:
                        # No Next button means we’re done with this subpage
                        break

                    try:
                        classes = await next_button.get_attribute("class") or ""
                    except Exception as class_attribute_error:
                        print(f"Error reading Next button class: {class_attribute_error}")
                        break

                    if "disabled" in classes:
                        # Next button is disabled, the last page has been reached
                        break

                    # Click “Next” and wait for the next page of rows to load
                    try:
                        await next_button.click()
                        await page.wait_for_timeout(1500)
                    except Exception as click_error:
                        print(f"Error clicking Next button: {click_error}")
                        break

            # Step 4: Close the browser and export the results
            print("\nClosing browser...")
            await browser.close()

            try:
                df = pd.DataFrame(all_rows, columns=["Prefix", "Name"])
                df.to_csv("BCBS_Prefix_Data.csv", index=False, encoding="utf-8")
                print(f"Export complete: {len(df)} rows saved to 'BCBS_Prefix_Data.csv'.")
            except Exception as export_error:
                print(f"Error exporting to CSV: {export_error}")

    except Exception as unexpected_error:
        print(f"Unexpected error: {unexpected_error}")

await scrape_all_prefixes()

Starting Playwright
Navigating to BCBS prefix list index
Extracting subpage links for A to Z ranges...
Subpage links found:
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-aaa-to-azz/
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-baa-to-bzz/
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-caa-to-czz/
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-daa-to-dzz/
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-eaa-to-ezz/
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-haa-to-hzz/
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-iaa-to-izz/
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-jaa-to-jzz/
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-kaa-to-kzz/
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-laa-to-lzz/
  • https://mypayerdirectory.com/bcbs-prefix-list/bcbs-a

  return _current_tasks.get(loop)



Scraping subpage: https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-caa-to-czz/

Scraping subpage: https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-daa-to-dzz/

Scraping subpage: https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-eaa-to-ezz/

Scraping subpage: https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-haa-to-hzz/

Scraping subpage: https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-iaa-to-izz/

Scraping subpage: https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-jaa-to-jzz/

Scraping subpage: https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-kaa-to-kzz/

Scraping subpage: https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-laa-to-lzz/

Scraping subpage: https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-maa-to-mzz/

Scraping subpage: https://mypayerdirectory.com/bcbs-prefix-list/bcbs-alpha-prefixes-naa-to-nzz/

Scraping subpage: https://myp