In [4]:
"""
limited_cars.com_scraper.ipynb

This notebook contains a limited version of a web scraper for Cars.com.
It extracts car listings, including details like title, price, dealer, location, rating, and additional vehicle specifications.

Purpose:
- This code scrapes only a few cars to test functionality.
- It ensures that the scraping logic works before scaling to the full dataset.
- It helps identify potential issues (e.g., page structure changes, missing elements) before running a full scrape.

Once validated, this script can be expanded into a full version that scrapes all available listings.

Author: Elijah Lewis

"""
from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time

#Start Selenium WebDriver
driver = webdriver.Chrome()

#Cars.com URL - Page contains 100 cars.
url = "https://www.cars.com/shopping/results/?page=1&page_size=100&stock_type=all&zip="

#Load page
driver.get(url)

#Wait for content to load
time.sleep(5)  

#Get page source after javascript execution
page_source = driver.page_source

#Parse with BeautifulSoup
soup = BeautifulSoup(page_source, "html.parser")

#Find all car listings
cars = soup.find_all("div", class_="vehicle-details")
#Extract details for the first 10 cars
for car in cars[:10]:  
    title = car.find("h2", class_="title").text.strip() if car.find("h2", class_="title") else "N/A"
    price = car.find("div", class_="price-section").text.strip() if car.find("div", class_="price-section") else "N/A"
    link = "https://www.cars.com" + car.find("a")["href"] if car.find("a") else "N/A"

    #Visit car details page
    driver.get(link)
    time.sleep(3)  #Allow page to load
    detail_soup = BeautifulSoup(driver.page_source, "html.parser")

    #Extract key features from the details page
    features = {}
    sections = detail_soup.find_all("dl", class_="fancy-description-list")
    for section in sections:
        items = section.find_all("dt")
        values = section.find_all("dd")
        for dt, dd in zip(items, values):
            feature_key = dt.text.strip()
            if dd.find("ul", class_="vehicle-features-list"):
                feature_value = ", ".join([li.text.strip() for li in dd.find_all("li")])
            else:
                feature_value = dd.text.strip()
            features[feature_key] = feature_value

    #Print details for the current car
    print(f"Title: {title}\nPrice: {price}\nLink: {link}")
    for key, value in features.items():
        print(f"{key}: {value}")
    print("-" * 40)

#Close Selenium
driver.quit()

Title: 2024 Chrysler Pacifica Hybrid Select
Price: $47,455
MSRP $54,955
Link: https://www.cars.com/vehicledetail/d9222569-983c-4284-a2cf-c0e467015f72/?attribution_type=isa
Exterior color: Bright White Clearcoat
Interior color: Black
Drivetrain: Front-wheel Drive
MPG: –
Fuel type: Hybrid
Transmission: Automatic CVT
Engine: 3.6L V6 24V MPFI DOHC Hybrid
VIN: 2C4RC1S79RR134670
Stock #: C219
Mileage: 15 mi.
Convenience: Adaptive Cruise Control, Heated Seats, Heated Steering Wheel, Navigation System, Power Liftgate, Remote Start
Entertainment: Apple CarPlay®/Android Auto®, Bluetooth®, USB Port
Exterior: Alloy Wheels
Seating: Leather Seats, Memory Seat, Third Row Seating
----------------------------------------
Title: 2024 Land Rover Range Rover 3.0L V6 Supercharged HSE
Price: $54,587
Link: https://www.cars.com/vehicledetail/ba096392-0f70-4265-908c-f325a874252c/?attribution_type=isa
Exterior color: Ligurian Black in Gloss Finish
Interior color: Deep Garnet/Ebony
Drivetrain: Four-wheel Drive
M