# 01 - BaT Scraping

Scrape completed Porsche 911 auctions from Bring a Trailer.

**Outputs:**
- `data/raw/bat_listings.parquet` - Raw scraped data

In [1]:
import logging
import pandas as pd
from pathlib import Path

from price_analysis.scraping import fetch_auctions
from price_analysis.scraping.bat import listings_to_dataframe

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
# Paths
DATA_DIR = Path("../data")
RAW_DIR = DATA_DIR / "raw"
RAW_DIR.mkdir(parents=True, exist_ok=True)

OUTPUT_PATH = RAW_DIR / "bat_listings.parquet"

## Configure Scraping

Adjust search queries and pagination as needed.

In [3]:
# Search queries - can be refined for specific generations
QUERIES = [
    "Porsche 911",  # Broad search to get all 911s
]

# Scraping parameters
# TODO bump to 50+ once fully debugged and working
MAX_CLICKS = 3  # Number of "Show More" button clicks (each loads ~24 listings)
DELAY = 2.5  # Seconds between requests (be polite!)
HEADLESS = True  # Set False to see browser for debugging

## Debug: Inspect BaT Page Structure (Optional)

Skip this section - fixture files already saved to `tests/fixtures/`.
Only re-run if BaT changes their DOM structure.

In [4]:
# Debug: Save sample pages for selector inspection
import time
from price_analysis.scraping import create_driver, save_debug_page

# Create driver (set HEADLESS=False above to watch)
driver = create_driver(headless=HEADLESS)

try:
    # 1. Fetch search results page
    search_url = "https://bringatrailer.com/auctions/results/?s=Porsche+911"
    logger.info(f"Fetching search page: {search_url}")
    driver.get(search_url)
    time.sleep(3)  # Wait for JS to load
    save_debug_page(driver, "bat_search_page", output_dir="../tests/fixtures")

    # 2. Fetch a specific listing page (992.1)
    listing_url = "https://bringatrailer.com/listing/2020-porsche-911-carrera-4s-coupe-26/"
    logger.info(f"Fetching listing: {listing_url}")
    driver.get(listing_url)
    time.sleep(3)
    save_debug_page(driver, "bat_listing_992", output_dir="../tests/fixtures")

    # 3. Fetch another listing (997.2) for variety
    listing_url_997 = "https://bringatrailer.com/listing/2009-porsche-911-carrera-4s-coupe-28/"
    logger.info(f"Fetching listing: {listing_url_997}")
    driver.get(listing_url_997)
    time.sleep(3)
    save_debug_page(driver, "bat_listing_997", output_dir="../tests/fixtures")

    logger.info("Debug pages saved to tests/fixtures/")

finally:
    driver.quit()

INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [/Users/flatljan/.wdm/drivers/chromedriver/mac64/143.0.7499.169/chromedriver-mac-arm64/chromedriver] found in cache
INFO:__main__:Fetching search page: https://bringatrailer.com/auctions/results/?s=Porsche+911
INFO:price_analysis.scraping.bat:Saved debug page to ../tests/fixtures/bat_search_page.html
INFO:__main__:Fetching listing: https://bringatrailer.com/listing/2020-porsche-911-carrera-4s-coupe-26/
INFO:price_analysis.scraping.bat:Saved debug page to ../tests/fixtures/bat_listing_992.html
INFO:__main__:Fetching listing: https://bringatrailer.com/listing/2009-porsche-911-carrera-4s-coupe-28/
INFO:price_analysis.scraping.bat:Saved debug page to ../tests/fixtures/bat_listing_997.html
INFO:__main__:Debug pages saved to tests/fixtures/


## Run Scraper

This will take a while depending on MAX_PAGES. Each page + listing takes ~3-5 seconds.

In [None]:
all_listings = []

for query in QUERIES:
    logger.info(f"Scraping: {query}")
    listings = fetch_auctions(
        query=query,
        max_clicks=MAX_CLICKS,
        delay=DELAY,
        headless=HEADLESS,
    )
    all_listings.extend(listings)
    logger.info(f"Found {len(listings)} listings for '{query}'")

logger.info(f"Total listings scraped: {len(all_listings)}")

INFO:__main__:Scraping: Porsche 911
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Get LATEST chromedriver version for google-chrome
INFO:WDM:Driver [/Users/flatljan/.wdm/drivers/chromedriver/mac64/143.0.7499.169/chromedriver-mac-arm64/chromedriver] found in cache
INFO:price_analysis.scraping.bat:Fetching search results: https://bringatrailer.com/auctions/results/?s=Porsche+911
INFO:price_analysis.scraping.bat:Clicked 'Show More' (1/3)
INFO:price_analysis.scraping.bat:Clicked 'Show More' (2/3)
INFO:price_analysis.scraping.bat:Clicked 'Show More' (3/3)
INFO:price_analysis.scraping.bat:Found 82 unique listings after 3 'Show More' clicks
INFO:price_analysis.scraping.bat:Found 82 unique listings to fetch
INFO:price_analysis.scraping.bat:Processing listing 1/82
INFO:price_analysis.scraping.bat:Fetching listing: https://bringatrailer.com/listing/2004-porsche-911-40th-anniversary-121/
INFO:price_analysis.scraping.bat:Processing listing 2/82
INFO:price_analysis.scraping.ba

## Convert to DataFrame and Save

In [None]:
df = listings_to_dataframe(all_listings)
display(df.head(10))
print(f"\nShape: {df.shape}")

Unnamed: 0,listing_url,title_raw,sale_price,sale_date,model_year,generation,trim,transmission,mileage,color,location
0,https://bringatrailer.com/listing/1995-porsche...,1995 Porsche 911 Carrera Coupe 6-Speed,137000122225,,1995.0,,Carrera,Manual,,,
1,https://bringatrailer.com/listing/1985-porsche...,1985 Porsche 911 Carrera Targa,42250122425,,1985.0,,Targa,Manual,,,
2,https://bringatrailer.com/listing/2019-porsche...,100-Mile Meissen Blue 2019 Porsche 911 Speedster,531000122325,,2019.0,991.2,,Manual,100.0,,
3,https://bringatrailer.com/listing/2006-porsche...,40k-Mile 2006 Porsche 911 Carrera S Coupe 6-Speed,67500122325,,2006.0,997.1,Carrera S,Manual,,,
4,https://bringatrailer.com/listing/1999-porsche...,LS3-Powered 1999 Porsche 911 Carrera 4 Coupe 6...,35911122325,,1999.0,996.1,Carrera 4,Manual,,,
5,https://bringatrailer.com/listing/1989-porsche...,1989 Porsche 911 Carrera Coupe G50,68500122225,,1989.0,,Carrera,Manual,,,
6,https://bringatrailer.com/listing/transaxle-13/,Porsche 930 4-Speed Transaxle,6100122325,,,,,Manual,,,Listing DetailsFour-Speed Manual TransaxleLimi...
7,https://bringatrailer.com/listing/2020-porsche...,6k-Mile 2020 Porsche 911 Carrera 4S Coupe,137000122425,,2020.0,992.1,Carrera 4S,PDK,,,
8,https://bringatrailer.com/listing/1984-porsche...,Modified 1984 Porsche 911 Carrera Coupe,58000122325,,1984.0,,Carrera,Manual,,,
9,https://bringatrailer.com/listing/2009-porsche...,15k-Mile 2009 Porsche 911 Carrera 4S Coupe 6-S...,105000122225,,2009.0,997.2,Carrera 4S,Manual,,,



Shape: (24, 11)


In [None]:
# Append to existing if present, otherwise create new
if OUTPUT_PATH.exists():
    existing = pd.read_parquet(OUTPUT_PATH)
    df = pd.concat([existing, df], ignore_index=True)
    df = df.drop_duplicates(subset=["listing_url"], keep="last")
    logger.info(f"Merged with existing data: {len(df)} total listings")

df.to_parquet(OUTPUT_PATH, index=False)
logger.info(f"Saved to {OUTPUT_PATH}")

INFO:__main__:Saved to ../data/raw/bat_listings.parquet


## Quick Inspection

In [None]:
print("Counts by generation:")
display(df["generation"].value_counts(dropna=False))

Counts by generation:


generation
None     11
997.1     3
991.2     2
996.1     2
992.1     2
996.2     2
997.2     1
992.2     1
Name: count, dtype: int64

In [None]:
print("Counts by trim:")
display(df["trim"].value_counts(dropna=False))

Counts by trim:


trim
Carrera        7
None           4
Turbo          4
Carrera 4      2
Carrera 4S     2
Targa          1
Carrera S      1
GT2 RS         1
GT3            1
GT3 Touring    1
Name: count, dtype: int64

In [None]:
print("Counts by transmission:")
display(df["transmission"].value_counts(dropna=False))

Counts by transmission:


transmission
Manual       19
PDK           2
None          2
Automatic     1
Name: count, dtype: int64

In [None]:
# Check parsing quality - how many have all required fields?
required = ["sale_price", "model_year", "generation", "trim", "transmission", "mileage"]
complete = df[required].notna().all(axis=1).sum()
print(
    f"\nListings with all required fields: {complete} / {len(df)} ({complete / len(df) * 100:.1f}%)"
)


Listings with all required fields: 0 / 24 (0.0%)


In [None]:
# Sample some listings to verify parsing
print("Sample listings for manual verification:")
sample = df.sample(min(5, len(df)))
for _, row in sample.iterrows():
    print(f"\n{row['title_raw']}")
    print(
        f"  Parsed: {row['model_year']} {row['generation']} {row['trim']} ({row['transmission']})"
    )
    print(f"  Price: ${row['sale_price']:,}" if pd.notna(row["sale_price"]) else "  Price: N/A")
    print(f"  Mileage: {row['mileage']:,}" if pd.notna(row["mileage"]) else "  Mileage: N/A")

Sample listings for manual verification:

LS3-Powered 1999 Porsche 911 Carrera 4 Coupe 6-Speed RWD
  Parsed: 1999.0 996.1 Carrera 4 (Manual)
  Price: $35,911,122,325
  Mileage: N/A

Signal Green 2018 Porsche 911 GT2 RS Weissach
  Parsed: 2018.0 991.2 GT2 RS (PDK)
  Price: $531,111,122,425
  Mileage: N/A

6k-Mile 1994 Porsche 911 Turbo 3.6
  Parsed: 1994.0 None Turbo (Manual)
  Price: $876,000,122,425
  Mileage: N/A

1976 Porsche 930 Turbo Carrera Project
  Parsed: nan None Turbo (Manual)
  Price: $102,000,122,225
  Mileage: N/A

20×9″ and 20×12″ Center-Lock Wheels for Porsche 991 GT3
  Parsed: nan None GT3 (None)
  Price: $2,900,122,425
  Mileage: N/A
