# EXTRACTING REQUIRED INFO ONLY (NEW + USED) (JSON FORMAT)
- Year
- Make
- Model
- Trim
- BaseMSRP
- DriveType
- Image Link
- Link to internal page
- Body Style
- Engine

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

def get_car_data(url, new_or_used):
    driver.get(url)
    time.sleep(5)

    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    car_listings = driver.find_elements(By.CLASS_NAME, 'vehicle-list-cell')

    cars = []
    for item in car_listings:
        car = {}
        car['new_or_used'] = new_or_used

        try:
            name_tag = item.find_element(By.CLASS_NAME, 'vehicle-year-make-model')
            name_text = name_tag.text.strip()
            name_parts = name_text.split(' ')
            car['Year'] = name_parts[0]
            car['Make'] = name_parts[1]
            car['Model'] = name_parts[2]
            car['Trim'] = ' '.join(name_parts[3:])
        except:
            car['Year'] = "N/A"
            car['Make'] = "N/A"
            car['Model'] = "N/A"
            car['Trim'] = "N/A"

        try:
            price_tag = item.find_element(By.CSS_SELECTOR, 'span[itemprop="price"]')
            car['Base_MSRP'] = price_tag.text.strip()
        except:
            car['Base_MSRP'] = "N/A"

        try:
            image_tag = item.find_element(By.CSS_SELECTOR, 'img.img-list-respnsive')
            image_url = image_tag.get_attribute('src')
            if image_url == "https://static.edealer.ca/V3_1/assets/images/n":
                car['Image_Link'] = "https://static.edealer.ca/V3_1/assets/images/new_vehicles_images_coming.png"
            else:
                car['Image_Link'] = image_url
        except:
            car['Image_Link'] = "N/A"

        try:
            internal_link_tag = name_tag.find_element(By.CSS_SELECTOR, 'a.stat-text-link')
            internal_link = internal_link_tag.get_attribute('href')
            car['internal_link'] = internal_link
        except:
            car['internal_link'] = "N/A"

        try:
            drivetrain_tag = item.find_element(By.XPATH, ".//td[contains(text(), 'Drivetrain')]/following-sibling::td[1]")
            car['Drivetrain'] = drivetrain_tag.text.strip()
        except:
            car['Drivetrain'] = "N/A"
        
        try:
            body_style_tag = item.find_element(By.XPATH, ".//td[contains(text(), 'Body Style:')]/following-sibling::td[1]")
            car['Body_Style'] = body_style_tag.text.strip()
        except:
            car['Body_Style'] = "N/A"

        try:
            engine_tag = item.find_element(By.XPATH, ".//td[contains(text(), 'Engine:')]/following-sibling::td[1]")
            car['Engine'] = engine_tag.text.strip()
        except:
            car['Engine'] = "N/A"

        cars.append(car)

    return cars

options = Options()
options.headless = True
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)

urls = [("https://www.camclarkfordrichmond.com/used/", "Used"),
        ("https://www.camclarkfordrichmond.com/new/", "New")]

all_cars = []

for url, new_or_used in urls:
    all_cars.extend(get_car_data(url, new_or_used))

driver.quit()

df = pd.DataFrame(all_cars)

print(df)

car_list_json = df.to_dict(orient='records')
car_list_json

    new_or_used  Year      Make    Model  \
0          Used  2024      Ford  Mustang   
1          Used  2013  Infiniti     JX35   
2          Used  2017   Hyundai   Tucson   
3          Used  2016     Mazda     CX-5   
4          Used  2019    Nissan   Sentra   
..          ...   ...       ...      ...   
180         New  2024      Ford    F-150   
181         New  2024      Ford    F-150   
182         New  2024      Ford    F-150   
183         New  2023      Ford    F-150   
184         New  2024      Ford    F-250   

                                            Trim  Base_MSRP  \
0                        GT Premium 2dr Fastback    $63,995   
1                  All-Wheel Drive Sport Utility    $16,995   
2                Limited 1.6 4dr All-Wheel Drive    $17,857   
3    GS (A6) 4dr Front-Wheel Drive Sport Utility    $18,105   
4                                                   $21,995   
..                                           ...        ...   
180         Raptor 4x4 SuperCr

[{'new_or_used': 'Used',
  'Year': '2024',
  'Make': 'Ford',
  'Model': 'Mustang',
  'Trim': 'GT Premium 2dr Fastback',
  'Base_MSRP': '$63,995',
  'Image_Link': 'https://static.edealer.ca/V3_1/assets/images/new_vehicles_images_coming.png',
  'internal_link': 'https://www.camclarkfordrichmond.com/used/vehicle/2024-ford-mustang-gt-premium-id13236162.htm',
  'Drivetrain': 'Rear Wheel Drive',
  'Body_Style': 'Coupe',
  'Engine': '5.0L 8cyl'},
 {'new_or_used': 'Used',
  'Year': '2013',
  'Make': 'Infiniti',
  'Model': 'JX35',
  'Trim': 'All-Wheel Drive Sport Utility',
  'Base_MSRP': '$16,995',
  'Image_Link': 'https://static.edealer.ca/V3_1/assets/images/new_vehicles_images_coming.png',
  'internal_link': 'https://www.camclarkfordrichmond.com/used/vehicle/2013-infiniti-jx35-base-id13231320.htm',
  'Drivetrain': 'All Wheel Drive',
  'Body_Style': 'SUV',
  'Engine': '3.5L 6cyl'},
 {'new_or_used': 'Used',
  'Year': '2017',
  'Make': 'Hyundai',
  'Model': 'Tucson',
  'Trim': 'Limited 1.6 4dr A