# Predicting used car prices - Autotrade

### 1. Data Collection

#### Data source: https://www.autotrader.com.au/

In [91]:
# Import libraries
from splinter import Browser
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.service import Service
import os
import json
import time
import pandas as pd

In [92]:
# Visit Autotrade site
driver_path = os.path.join(os.path.expanduser("~"),"Documents", "DATA ANALYTICS BOOTCAMP", "Apps", "chromedriver_win32", "chromedriver.exe")
browser = Browser('chrome',service=Service(executable_path=driver_path))

In [93]:
# Visit the Mars news site
url = 'https://www.autotrader.com.au/for-sale/wa/perth?distance=25'
browser.visit(url)
html = browser.html

In [94]:
# Create a Beautiful Soup object
soup = BeautifulSoup(html, 'html.parser')

In [95]:
# Find all divs
question_sections = soup.find_all('a', class_='carListing')
question_sections

[<a class="carListing" data-cy="carListing-0" data-v-02134ae8="" href="/car/13138010/audi/tt/wa/wangara/sportscar"><div class="cardLayout" data-v-02134ae8="" data-v-343adefa=""><div class="cardLayout--thumbnail" data-v-343adefa=""><img class="carListing--image" data-v-02134ae8="" data-v-343adefa="" fetchpriority="high" src="https://autotraderau-res.cloudinary.com/t_listing_grid_c/inventory/2023-05-15/67524907114861/13138010/2007_audi_tt_Used_1.jpg"/> <div class="cardLayout--imageOverlay" data-v-343adefa=""></div> <div class="shortlist carListing--shortlistHeart" data-v-02134ae8="" data-v-4a1da896="" id="shortlist-13138010"><button class="" data-v-4a1da896=""><svg class="shortlist--heart" data-v-4a1da896="" height="24px" version="1.1" viewbox="0 0 24 24" width="24px" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><path d="M16.2675833,4 C14.7601704,4 13.5504852,4.67694387 12.8156725,5.73345611 L11.9882817,6.92307687 L11.1696024,5.72744437 C10.452741,4.68051

In [96]:
question_sections[0]

<a class="carListing" data-cy="carListing-0" data-v-02134ae8="" href="/car/13138010/audi/tt/wa/wangara/sportscar"><div class="cardLayout" data-v-02134ae8="" data-v-343adefa=""><div class="cardLayout--thumbnail" data-v-343adefa=""><img class="carListing--image" data-v-02134ae8="" data-v-343adefa="" fetchpriority="high" src="https://autotraderau-res.cloudinary.com/t_listing_grid_c/inventory/2023-05-15/67524907114861/13138010/2007_audi_tt_Used_1.jpg"/> <div class="cardLayout--imageOverlay" data-v-343adefa=""></div> <div class="shortlist carListing--shortlistHeart" data-v-02134ae8="" data-v-4a1da896="" id="shortlist-13138010"><button class="" data-v-4a1da896=""><svg class="shortlist--heart" data-v-4a1da896="" height="24px" version="1.1" viewbox="0 0 24 24" width="24px" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"><path d="M16.2675833,4 C14.7601704,4 13.5504852,4.67694387 12.8156725,5.73345611 L11.9882817,6.92307687 L11.1696024,5.72744437 C10.452741,4.680510

In [97]:
# Create an empty dictionary for the car data
car_data = {}

# Extract the year, make, and variant
h3_tag = question_sections[1].find('h3', class_='carListing--title')
year_model = h3_tag.contents[0].strip()
car = h3_tag.find('strong', class_='mmv').text.strip()
variant = h3_tag.find('span', class_='variant').text.strip()

# Print the extracted information
print("year_model:", year_model)
print("car:", car)
print("variant:", variant)

# Create a dictionary for the car data
car_data['year_model'] = year_model
car_data['car'] = car
car_data['variant'] = variant

print(car_data)

year_model: 2019
car: Hyundai I30 Active
variant: Active
{'year_model': '2019', 'car': 'Hyundai I30 Active', 'variant': 'Active'}


In [98]:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [99]:
# Find the button element
# Click on the car listing
wait = WebDriverWait(browser.driver, 5)
car_listing = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'a.carListing')))
car_listing[2].click()

# Execute a click on the button
# try:
#     button.click()
# except Exception as e:
#     print(f"Exception: {e}")

In [100]:
# Wait for the details page to load
time.sleep(2)

In [101]:
# Get the HTML of the current page after clicking the button
html = browser.html

# Create a Beautiful Soup object
soup = BeautifulSoup(html, 'html.parser')

# Find the <h1> element with class "title"
h1_element = soup.find('h1', class_='title')

# Extract the text from the <h1> element
if h1_element:
    text = h1_element.get_text(strip=True)
    print(text)
else:
    print("No <h1> element found.")

car_data['specs'] = text

print(car_data)

2019 Hyundai I30 Active PD2 MY19
{'year_model': '2019', 'car': 'Hyundai I30 Active', 'variant': 'Active', 'specs': '2019 Hyundai I30 Active PD2 MY19'}


In [102]:
# Find the button element for the next click
wait = WebDriverWait(browser.driver, 5)
next_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.vehicleDetails--sectionHeader > div.vehicleDetails--title')))

# Execute a click on the next button
try:
    next_button.click()
except Exception as e:
    print(f"Exception: {e}")

In [103]:
# Find all divs
table_rows = soup.find_all('tr', class_='table--row')
table_rows

[<tr class="table--row" data-v-4c87c799=""><td class="table--label" data-v-4c87c799="">
         Kilometres
       </td> <td class="table--value table--bold" data-v-4c87c799="">
         66,975km
       </td></tr>,
 <tr class="table--row" data-v-4c87c799=""><td class="table--label" data-v-4c87c799="">
         Seller type
       </td> <td class="table--value table--bold" data-v-4c87c799="">
         Dealer: Used
       </td></tr>,
 <tr class="table--row" data-v-4c87c799=""><td class="table--label" data-v-4c87c799="">
         Price
       </td> <td class="table--value table--bold" data-v-4c87c799="">
         $21,998
       </td></tr>,
 <tr class="table--row" data-v-4c87c799=""><td class="table--label" data-v-4c87c799="">
         Transmission
       </td> <td class="table--value table--bold" data-v-4c87c799="">
         Automatic
       </td></tr>,
 <tr class="table--row" data-v-4c87c799=""><td class="table--label" data-v-4c87c799="">
         Body type
       </td> <td class="table--

In [104]:
# Extract only text
for row in table_rows:
    label = row.find('td', class_='table--label')
    value = row.find('td', class_='table--value')

    label_text = label.get_text(strip=True)
    value_text = value.get_text(strip=True)

    car_data[label_text] = value_text
    
    print(f"{label_text}: {value_text}")

Kilometres: 66,975km
Seller type: Dealer: Used
Price: $21,998
Transmission: Automatic
Body type: HATCH, 4 Doors, 5 Seats
Drive type: Front Wheel Drive
Engine: 4 cyl, 2 L
Fuel type: Unleaded Petrol
Fuel consumption: 7.40 L / 100 km
Colour ext / int: Silver / -
Registration: 1GTL860
Rego expiry: -
VIN: KMHH351EMKU099596
Stock No: P6368
ANCAP Safety rating: 
Green overall rating: LeafCreated with Sketch.LeafCreated with Sketch.LeafCreated with Sketch.LeafCreated with Sketch.LeafCreated with Sketch.
Dealer: Gardner Passenger
Address: 1324 Albany Highway, Cannington, WA
Seating capacity: 5
Doors: 4
Front tyre size: 205/55 R16
Front rim size: 6.5x16
Rear tyre size: 205/55 R16
Rear rim size: 6.5x16
Injection / Carburation: 
CC: 1999
Number of cylinders: 4
Front suspension: MacPherson Strut
Rear suspension: Torsion Beam Axle
Front brakes: Disc - Ventilated
Rear brakes: Disc
Fuel type: Unleaded Petrol
Fuel tank capacity: 50.00
Fuel consumption: 7.40 L / 100 km
Valve gear type: 16
Maximum torque

In [105]:
for key, value in car_data.items():
        print(f"{key}: {value}")

year_model: 2019
car: Hyundai I30 Active
variant: Active
specs: 2019 Hyundai I30 Active PD2 MY19
Kilometres: 66,975km
Seller type: Dealer: Used
Price: $21,998
Transmission: Automatic
Body type: HATCH, 4 Doors, 5 Seats
Drive type: Front Wheel Drive
Engine: 4 cyl, 2 L
Fuel type: Unleaded Petrol
Fuel consumption: 7.40 L / 100 km
Colour ext / int: Silver / -
Registration: 1GTL860
Rego expiry: -
VIN: KMHH351EMKU099596
Stock No: P6368
ANCAP Safety rating: 
Green overall rating: LeafCreated with Sketch.LeafCreated with Sketch.LeafCreated with Sketch.LeafCreated with Sketch.LeafCreated with Sketch.
Dealer: Gardner Passenger
Address: 1324 Albany Highway, Cannington, WA
Seating capacity: 5
Doors: 4
Front tyre size: 205/55 R16
Front rim size: 6.5x16
Rear tyre size: 205/55 R16
Rear rim size: 6.5x16
Injection / Carburation: 
CC: 1999
Number of cylinders: 4
Front suspension: MacPherson Strut
Rear suspension: Torsion Beam Axle
Front brakes: Disc - Ventilated
Rear brakes: Disc
Fuel tank capacity: 50.0

In [106]:
# Append the car data dictionary to the list
car_info = []
car_info.append(car_data)

In [107]:
df = pd.DataFrame(car_info)
df

Unnamed: 0,year_model,car,variant,specs,Kilometres,Seller type,Price,Transmission,Body type,Drive type,...,Make,Model,Variant,Series,Warranty when new (months),Warranty when new (kms),Service interval (months),Service interval (kms),Country of origin,Vehicle segment
0,2019,Hyundai I30 Active,Active,2019 Hyundai I30 Active PD2 MY19,"66,975km",Dealer: Used,"$21,998",Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,...,Hyundai,I30,Active,PD2 MY19,60,999000,12,15000,,


In [111]:
# Find the button element for the next page click
wait = WebDriverWait(browser.driver, 5)
next_page_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'p[data-cy="pagination--arrow-right"]')))

# Execute a click on the next button
try:
    next_page_button.click()
except Exception as e:
    print(f"Exception: {e}")

In [109]:
time.sleep(3)

# Get the HTML of the current page after clicking the button
html = browser.html

# Create a Beautiful Soup object
soup = BeautifulSoup(html, 'html.parser')

In [110]:
# Find the button element for the next click
wait = WebDriverWait(browser.driver, 5)
next_page_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'p.data-cy.pagination--arrow-right')))

# Execute a click on the next button
try:
    next_page_button.click()
except Exception as e:
    print(f"Exception: {e}")

TimeoutException: Message: 
Stacktrace:
Backtrace:
	GetHandleVerifier [0x00C88893+48451]
	(No symbol) [0x00C1B8A1]
	(No symbol) [0x00B25058]
	(No symbol) [0x00B50467]
	(No symbol) [0x00B5069B]
	(No symbol) [0x00B7DD92]
	(No symbol) [0x00B6A304]
	(No symbol) [0x00B7C482]
	(No symbol) [0x00B6A0B6]
	(No symbol) [0x00B47E08]
	(No symbol) [0x00B48F2D]
	GetHandleVerifier [0x00EE8E3A+2540266]
	GetHandleVerifier [0x00F28959+2801161]
	GetHandleVerifier [0x00F2295C+2776588]
	GetHandleVerifier [0x00D12280+612144]
	(No symbol) [0x00C24F6C]
	(No symbol) [0x00C211D8]
	(No symbol) [0x00C212BB]
	(No symbol) [0x00C14857]
	BaseThreadInitThunk [0x76D100C9+25]
	RtlGetAppContainerNamedObjectPath [0x779D7B4E+286]
	RtlGetAppContainerNamedObjectPath [0x779D7B1E+238]


In [None]:
# browser.quit()