In [1]:
# Import dependencies
from splinter import Browser
from bs4 import BeautifulSoup as soup
import pandas as pd
import time

In [2]:
# Initialize Splinter browser
browser = Browser('chrome')

# URLs for Honda CR-V and CR-V Hybrid
base_urls = [
    'https://www.cars.com/shopping/results/?makes[]=honda&maximum_distance=50&models[]=honda-cr_v&page=1&page_size=100&stock_type=all&zip=61606',
    'https://www.cars.com/shopping/results/?stock_type=all&makes%5B%5D=honda&models%5B%5D=honda-cr_v_hybrid&maximum_distance=50&zip=61606'
]

# Create empty lists to store data
models = []
mileages=[]
statuses = []
prices = []
dealers = []
cities = []

# Function to reset lists
def clear_lists():
    global models, statuses, prices, dealers, cities, mileages
    models = []
    statuses = []
    prices = []
    dealers = []
    cities = []
    mileages = []

# Function to scrape data from a given URL
def scrape_url(url):
    browser.visit(url)
    html = browser.html
    car_soup = soup(html, 'html.parser')
    car_list = car_soup.find_all('div', class_='vehicle-card')
    
    for car in car_list:
        # Extract car model
        car_model = car.find('h2', class_='title')
        model = str(car_model.get_text(strip=True)) if car_model else 'N/A'

        # Extract car mileage
        car_mileage = car.find('div', class_='mileage')
        mileage = str(car_mileage.get_text(strip=True)) if car_mileage else 'N/A'

        # Extract car status
        car_status = car.find('p', class_='stock-type')
        status = str(car_status.get_text(strip=True)) if car_status else 'N/A'

        # Extract car price
        car_price = car.find('span', class_='primary-price')
        price = str(car_price.get_text(strip=True)) if car_price else 'N/A'

        # Extract dealer name
        car_dealer = car.find('div', class_='dealer-name').find('strong')
        dealer = str(car_dealer.get_text(strip=True)) if car_dealer else 'N/A'

        # Extract dealer city
        dealer_city = car.find('div', class_='miles-from')
        city = str(dealer_city.get_text(strip=True)) if dealer_city else 'N/A'

        # Append details to lists
        models.append(model)
        mileages.append(mileage)
        statuses.append(status)
        prices.append(price)
        dealers.append(dealer)
        cities.append(city)

# Scrape data from each URL in the base_urls list
for url in base_urls:
    scrape_url(url)

In [3]:
# Create df
data={
    'Model': models,
    'Status': statuses,
    'Price (USD)': prices,
    'Dealer': dealers,
    'City': cities,
    'Mileage (mi)': mileages,
    }
car_df= pd.DataFrame(data)

In [4]:
# Close the browser
browser.quit()

In [5]:
# Clear lists after scraping
# clear_lists()

In [6]:
# Display DF
print("Original DataFrame:")
print(car_df)
print()

Original DataFrame:
                             Model           Status Price (USD)  \
0             2017 Honda CR-V EX-L             Used     $16,066   
1             2013 Honda CR-V EX-L             Used     $13,995   
2             2025 Honda CR-V EX-L              New     $37,850   
3             2023 Honda CR-V EX-L             Used     $31,998   
4             2024 Honda CR-V EX-L             Used     $34,998   
..                             ...              ...         ...   
77  2021 Honda CR-V Hybrid Touring  Honda Certified     $32,982   
78  2024 Honda CR-V Hybrid Sport-L             Used     $37,999   
79    2024 Honda CR-V Hybrid Sport             Used     $34,500   
80       2020 Honda CR-V Hybrid EX             Used     $22,993   
81    2023 Honda CR-V Hybrid Sport             Used     $32,600   

                                               Dealer  \
0                            Yemm Chevrolet Buick GMC   
1                           Auto House of Bloomington   
2   

In [7]:
# Check for missing values:
missing_values= car_df.isnull().sum()
print("Missing values:")
print(missing_values)
print()

Missing values:
Model           0
Status          0
Price (USD)     0
Dealer          0
City            0
Mileage (mi)    0
dtype: int64



In [8]:

# Split 'Model' column into 'Year', 'Manufacturer', 'Model', and 'Trim'
# Handling 'CR-V' and 'CR-V Hybrid' correctly
split_df = car_df['Model'].str.extract(r'(?P<Year>\d{4})\s+(?P<Manufacturer>[\w-]+)\s+(?P<Model>CR-V(?:\s+Hybrid)?)\s+(?P<Trim>.*)')
#  Captures group called 'Year' (?P<Year> ; \d{4} matches to 4 digits
# Matches 1+ whitespace characters to account for space between Year and Manufacturer \s+
# Captures another group called Manufacturer, matches by 1+ words (?P<Manufacturer>[\w-]+)
# Captures group called Model (?P<Model> ; CR-V(?:\s+Hybrid)?) Matches to string "CR-V" 
# or by a white space (?:\s) plus the string "Hybrid" +Hybrid)?)
# Captures group called Trim (?P<Trim>) ; Matches any character 0+ times 
# (i.e. captures everything after Model as the Trim) using .*

# Replace 'Not Priced' with NaN and convert 'Price' column to numeric
car_df['Price (USD)'] = pd.to_numeric(car_df['Price (USD)'].replace('Not Priced', pd.NA).str.replace('[\$,]', '', regex=True), errors='coerce')

# Clean the 'Mileage' column
car_df['Mileage (mi)'] = pd.to_numeric(car_df['Mileage (mi)'].str.replace('mi.', '').str.replace(',', '').str.strip(), errors='coerce')

# Concatenate split_df with the remaining columns from car_df
car_df = pd.concat([split_df, car_df[['Status', 'Price (USD)', 'Dealer', 'City', 'Mileage (mi)']]], axis=1)

# Reorder columns as required
car_df = car_df[['Year', 'Manufacturer', 'Model', 'Trim', 'Mileage (mi)', 'Status', 'Price (USD)', 'Dealer', 'City']]

# Display the updated DataFrame
print("Final DataFrame:")
print(car_df)

Final DataFrame:
    Year Manufacturer        Model     Trim  Mileage (mi)           Status  \
0   2017        Honda         CR-V     EX-L      173049.0             Used   
1   2013        Honda         CR-V     EX-L       93804.0             Used   
2   2025        Honda         CR-V     EX-L           NaN              New   
3   2023        Honda         CR-V     EX-L       21648.0             Used   
4   2024        Honda         CR-V     EX-L        6753.0             Used   
..   ...          ...          ...      ...           ...              ...   
77  2021        Honda  CR-V Hybrid  Touring       29576.0  Honda Certified   
78  2024        Honda  CR-V Hybrid  Sport-L        4302.0             Used   
79  2024        Honda  CR-V Hybrid    Sport         329.0             Used   
80  2020        Honda  CR-V Hybrid       EX       64669.0             Used   
81  2023        Honda  CR-V Hybrid    Sport        9471.0             Used   

    Price (USD)                               

  car_df['Mileage (mi)'] = pd.to_numeric(car_df['Mileage (mi)'].str.replace('mi.', '').str.replace(',', '').str.strip(), errors='coerce')


In [9]:
car_df

Unnamed: 0,Year,Manufacturer,Model,Trim,Mileage (mi),Status,Price (USD),Dealer,City
0,2017,Honda,CR-V,EX-L,173049.0,Used,16066,Yemm Chevrolet Buick GMC,"Galesburg, IL (45 mi.)"
1,2013,Honda,CR-V,EX-L,93804.0,Used,13995,Auto House of Bloomington,"Bloomington, IL (38 mi.)"
2,2025,Honda,CR-V,EX-L,,New,37850,O'Brien Honda of Bloomington,"Bloomington, IL (38 mi.)"
3,2023,Honda,CR-V,EX-L,21648.0,Used,31998,CarMax Austin North - Offering Express Pickup ...,
4,2024,Honda,CR-V,EX-L,6753.0,Used,34998,CarMax Austin North - Offering Express Pickup ...,
...,...,...,...,...,...,...,...,...,...
77,2021,Honda,CR-V Hybrid,Touring,29576.0,Honda Certified,32982,Bosak Honda,"Highland, IN (126 mi.)"
78,2024,Honda,CR-V Hybrid,Sport-L,4302.0,Used,37999,Honda of Lisle,"Lisle, IL (111 mi.)"
79,2024,Honda,CR-V Hybrid,Sport,329.0,Used,34500,Gerald Honda of Matteson,"Matteson, IL (112 mi.)"
80,2020,Honda,CR-V Hybrid,EX,64669.0,Used,22993,McGrath Honda of St. Charles,"St. Charles, IL (110 mi.)"


In [11]:
print(car_df.to_string())

    Year Manufacturer        Model           Trim  Mileage (mi)           Status  Price (USD)                                                           Dealer                           City
0   2017        Honda         CR-V           EX-L      173049.0             Used        16066                                         Yemm Chevrolet Buick GMC         Galesburg, IL (45 mi.)
1   2013        Honda         CR-V           EX-L       93804.0             Used        13995                                        Auto House of Bloomington       Bloomington, IL (38 mi.)
2   2025        Honda         CR-V           EX-L           NaN              New        37850                                     O'Brien Honda of Bloomington       Bloomington, IL (38 mi.)
3   2023        Honda         CR-V           EX-L       21648.0             Used        31998  CarMax Austin North - Offering Express Pickup and Home Delivery                            N/A
4   2024        Honda         CR-V           EX-L 

In [10]:
car_df.to_excel("CR-V.xlsx")