In [128]:
# Name: Abel
# Date: 2021.07.20
# Purpose: A webscraper for Honda Civic from Waterloo and Toronto Dealerships
# Reference: Webscraper from Edureka

from bs4 import BeautifulSoup
import pandas as pd
import requests
from datetime import date

from selenium import webdriver
import time

In [129]:
# Name: Abel
# Date: 2021.07.21
# Purpose: Helper function to append to excel if it exists, and to create one if it doesn't
# Reference: https://stackoverflow.com/questions/38074678/append-existing-excel-sheet-with-new-dataframe-using-python-pandas

import os
from openpyxl import load_workbook


def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
                       truncate_sheet=False, 
                       **to_excel_kwargs):
    """
    Append a DataFrame [df] to existing Excel file [filename]
    into [sheet_name] Sheet.
    If [filename] doesn't exist, then this function will create it.

    @param filename: File path or existing ExcelWriter
                     (Example: '/path/to/file.xlsx')
    @param df: DataFrame to save to workbook
    @param sheet_name: Name of sheet which will contain DataFrame.
                       (default: 'Sheet1')
    @param startrow: upper left cell row to dump data frame.
                     Per default (startrow=None) calculate the last row
                     in the existing DF and write to the next row...
    @param truncate_sheet: truncate (remove and recreate) [sheet_name]
                           before writing DataFrame to Excel file
    @param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
                            [can be a dictionary]
    @return: None

    Usage examples:

    >>> append_df_to_excel('d:/temp/test.xlsx', df)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
                           index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2', 
                           index=False, startrow=25)

    (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
    """
    # Excel file doesn't exist - saving and exiting
    if not os.path.isfile(filename):
        df.to_excel(
            filename,
            sheet_name=sheet_name, 
            startrow=startrow if startrow is not None else 0, 
            **to_excel_kwargs)
        return
    
    # ignore [engine] parameter if it was passed
    if 'engine' in to_excel_kwargs:
        to_excel_kwargs.pop('engine')

    writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')

    # try to open an existing workbook
    writer.book = load_workbook(filename)
    
    # get the last row in the existing Excel sheet
    # if it was not specified explicitly
    if startrow is None and sheet_name in writer.book.sheetnames:
        startrow = writer.book[sheet_name].max_row

    # truncate sheet
    if truncate_sheet and sheet_name in writer.book.sheetnames:
        # index of [sheet_name] sheet
        idx = writer.book.sheetnames.index(sheet_name)
        # remove [sheet_name]
        writer.book.remove(writer.book.worksheets[idx])
        # create an empty sheet [sheet_name] using old index
        writer.book.create_sheet(sheet_name, idx)
    
    # copy existing sheets
    writer.sheets = {ws.title:ws for ws in writer.book.worksheets}

    if startrow is None:
        startrow = 0

    # write out the new sheet
    df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)

    # save the workbook
    writer.save()
    
def selHelp (URL):
    driver = webdriver.Chrome()  # Optional argument, if not specified will search path.
    #print ("round:", x)
    url = URL
    driver.get(url)

    SCROLL_PAUSE_TIME = 0.5
    # Get scroll height
    last_height = driver.execute_script("return document.body.scrollHeight")
    for x in range (0, last_height-1000, 1000): #this is necessary to allow items on page to load
        # Scroll down to bottom
        driver.execute_script("window.scrollTo("+str (x)+ ", "+str(x+1000)+");")
        # Wait to load page
        time.sleep(SCROLL_PAUSE_TIME)
        # Calculate new scroll height and compare with last scroll height
        new_height = driver.execute_script("return document.body.scrollHeight")
        #if new_height == last_height:
        #    break
        #last_height = new_height
    time.sleep(2) # Let the user actually see something!
    src = driver.page_source
    driver.close()
    return src

In [130]:
#Purpose: A webscraper for Honda Civic from Waterloo Honda

URL = "https://www.waterloohonda.com/en/used-inventory/honda/civic_sedan"
soup = BeautifulSoup(selHelp(URL))

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year & make of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax

for a in soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'}):
    year.append (a.find('span', attrs={'class':'inventory-tile-section-vehicle-name--year-make'}).text[:4])
    vehicleName.append(a.find('span', attrs={'class':'inventory-tile-section-vehicle-name--model-name'}).text)
    vehicleDetails.append ("https://www.waterloohonda.com/"+ a.find('a', attrs={'role':'button'}).attrs['href'])
    #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
    mileage.append (a.find('div', attrs={'itemprop':'mileageFromOdometer'}).text.replace('\n', '').strip()[:-3])
    try:
        price.append(a.find('span', attrs={'itemprop':'price'}).text.replace('\n', '').strip()[:-1])
    except:
        price.append('N/A')
    try:
        carfaxLink.append(a.find('a', attrs={'style':'display: inline-block;'}).attrs['href'])
    except:
        carfaxLink.append('N/A')
#print (carfaxLink)
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Mileage (km)':mileage, 'Price':price,'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print(df.head(10))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Waterloo', index=False)

(5, 6)


In [131]:
# Purpose: A webscraper for Honda Civic from Kitchener Honda

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]

url = "https://www.kitchenerhonda.com/inventory.html?filterid=a6b1d1BeADq0-10x0-0-0"
soup = BeautifulSoup(selHelp(url)) #Selenium needed for Carfax reports to be printed

for a in soup.findAll('li', attrs={'class':'carBoxWrapper'}):
    #print(len(a))    
    year.append (a.find('div', attrs={'class':'divModelYear elIsGreyable'}).text[:4])
    vehicleName.append('Civic ' + a.find('div', attrs={'class':'divTrim elIsGreyable'}).text.replace('\xa0', ''))
    vehicleDetails.append (a.find('a').attrs['href'][2:])
    #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
    mileage.append (a.find('span', attrs={'class':'s-km'}).text[:-3])
    colorTemp=a.find('span', attrs={'class':'s-desc'}).text.split()
    color.append (colorTemp[colorTemp.index('Int:')-1])
    try:
        price.append(a.find('span', attrs={'class':'p-base'}).text[:-1])
    except:
        price.append("N/A")
    try:
        carfaxLink.append(a.find('a', attrs={'title':'Carfax'}).attrs['href'])
    except:
        carfaxLink.append('N/A')
#print (carfaxLink) #for individual checking
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print( df.head(30))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Kitchener', index=False)

(8, 7)


In [132]:
# Purpose: A webscraper for Honda Civic from Cambridge Honda

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})

for x in range(1, 10):
    #print ("round:", x)
    URL = "https://www.cambridgecentrehonda.com/used/body/Sedan/model/Civic?pg=" + str(x)
    result = requests.get(URL)
    #print (result.status_code) #sanity check to ensure that the site exists, should return 200
    src = result.content #saves the entire source code...
    soup = BeautifulSoup(src)
    #print (len(numCars))
    for a in soup.findAll('div', attrs={'itemtype':'https://schema.org/Car'}):
        #print(len(a))    
        year.append (a.find('span', attrs={'itemprop':'releaseDate'}).text)
        vehicleName.append(a.find('span', attrs={'itemprop':'model'}).text)
        vehicleDetails.append ("https://www.cambridgecentrehonda.com/"+ a.find('a', attrs={'data-loc':'vehicle details'}).attrs['href'])
        #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
        mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text[:-3])
        color.append (a.find('td', attrs={'itemprop':'color'}).text)
        try:
            price.append(a.find('span', attrs={'itemprop':'price'}).text)
        except:
            price.append("N/A")
        try:
            carfaxLink.append(a.find('a', attrs={'class':'carfax_logo_a'}).attrs['href'])
        except:
            carfaxLink.append('N/A')
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print( df.head(30))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Cambridge', index=False)

(8, 7)


In [133]:
# Purpose: A webscraper for Honda Civic from Toronto Honda

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})

for x in range(1, 10):
    #print ("round:", x)
    URL = "https://www.torontohonda.com/used/Civic.html?pg=" + str(x)
    result = requests.get(URL)
    #print (result.status_code) #sanity check to ensure that the site exists, should return 200
    src = result.content #saves the entire source code...
    soup = BeautifulSoup(src)
    #print (len(numCars))
    for a in soup.findAll('div', attrs={'class':'vehicle-list-cell listing-page-row-padding-0'}):
    #    print(len(a))    
        year.append (a.find('span', attrs={'itemprop':'releaseDate'}).text)
        vehicleName.append(a.find('span', attrs={'itemprop':'model'}).text)
        vehicleDetails.append ("https://www.torontohonda.com/"+ a.find('a', attrs={'data-loc':'vehicle details'}).attrs['href'])
        #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
        mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text[:-3])
        color.append (a.find('td', attrs={'itemprop':'color'}).text)
        try:
            price.append(a.find('span', attrs={'itemprop':'price'}).text)
        except:
            price.append("N/A")
        try:
            carfaxLink.append(a.find('a', attrs={'class':'carfax_logo_a'}).attrs['href'])
        except:
            carfaxLink.append('N/A')
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print( df.head(10))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Toronto', index=False)

(41, 7)


In [134]:
# Purpose: A webscraper for Honda Civic from MidTown Honda

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})

for x in range(1, 10):
    #print ("round:", x)
    URL = "https://www.midtownhonda.com/used/model/Civic?pg=" + str(x)
    result = requests.get(URL)
    #print (result.status_code) #sanity check to ensure that the site exists, should return 200
    src = result.content #saves the entire source code...
    soup = BeautifulSoup(src)
    #print (len(numCars))
    for a in soup.findAll('div', attrs={'class':'vehicle-list-cell listing-page-row-padding-0'}):
    #    print(len(a))    
        year.append (a.find('span', attrs={'itemprop':'releaseDate'}).text)
        vehicleName.append(a.find('span', attrs={'itemprop':'model'}).text)
        vehicleDetails.append ("https://www.midtownhonda.com/"+ a.find('a', attrs={'data-loc':'vehicle details'}).attrs['href'])
        #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
        mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text[:-3])
        color.append (a.find('td', attrs={'itemprop':'color'}).text)
        try:
            price.append(a.find('span', attrs={'itemprop':'price'}).text)
        except:
            price.append("N/A")
        try:
            carfaxLink.append(a.find('a', attrs={'class':'carfax_logo_a'}).attrs['href'])
        except:
            carfaxLink.append('N/A')
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print( df.head(10))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Midtown', index=False)

(33, 7)


In [135]:
# Purpose: A webscraper for Honda Civic from Sisley Honda

#print ("round:", x)
URL = "https://www.sisleyhonda.com/inventory/used?q=&model=Civic+Sedan"
result = requests.get(URL)
#print (result.status_code) #sanity check to ensure that the site exists, should return 200
src = result.content #saves the entire source code...
soup = BeautifulSoup(src)
#print (len(numCars))

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})
#print(len(soup.findAll('div', attrs={'itemtype':'itemOffered'})))    
for a in soup.findAll('div', attrs={'itemprop':'itemOffered'}):
    try: #***this should be placed on top because there's a repeat in the HTML code...***
        price.append(a.find('div', attrs={'class':'vehicle-price right vehicle-price-color'}).text)
    except:
        break
    year.append (a.find('span', attrs={'itemprop':'vehicleModelDate'}).text)
    vehicleName.append(a.find('span', attrs={'itemprop':'model'}).text + " " + a.find('span', attrs={'itemprop':'vehicleConfiguration'}).text)
    vehicleDetails.append (a.find('a', attrs={'itemprop':'url'}).attrs['href'])
    #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
    mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text)
#    color.append (a.find('td', attrs={'itemprop':'color'}).text)
    try:
        carfaxLink.append(a.find('a', attrs={'target':'_blank'}).attrs['href'])
    except:
        carfaxLink.append("N/A")
#print (year)
#print (vehicleName)
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print( df.head(34))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Sisley', index=False)

(20, 6)


In [136]:
# Purpose: A webscraper for Honda Civic from Richmond Hill Honda

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})

for x in range(1, 10):
    #print ("round:", x)
    URL = "https://www.richmondhillhonda.com/used/model/Civic?pg=" + str(x)
    result = requests.get(URL)
    #print (result.status_code) #sanity check to ensure that the site exists, should return 200
    src = result.content #saves the entire source code...
    soup = BeautifulSoup(src)
    #print (len(numCars))
    for a in soup.findAll('div', attrs={'class':'vehicle-list-cell listing-page-row-padding-0'}):
    #    print(len(a))    
        year.append (a.find('span', attrs={'itemprop':'releaseDate'}).text)
        vehicleName.append(a.find('span', attrs={'itemprop':'model'}).text.replace(' 4dr Sedan', ''))
        vehicleDetails.append ("https://www.richmondhillhonda.com/"+ a.find('a', attrs={'data-loc':'vehicle details'}).attrs['href'])
        #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
        mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text[:-3])
        color.append (a.find('td', attrs={'itemprop':'color'}).text)
        try:
            price.append(a.find('span', attrs={'itemprop':'price'}).text)
        except:
            price.append("N/A")
        try:
            carfaxLink.append(a.find('a', attrs={'class':'carfax_logo_a'}).attrs['href'])
        except:
            carfaxLink.append('N/A')
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print( df.head(30))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='RichmondHill', index=False)

(24, 7)


In [137]:
# Purpose: A webscraper for Honda Civic from Markham Honda

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]

driver = webdriver.Chrome()  # Optional argument, if not specified will search path.
for x in range(1, 10):
#print ("round:", x)
    url = "https://www.markhamhonda.com/en/used-inventory?modelId=2311&frameStyleId=1&namedSorting=default&limit=24&page=" + str(x)
    driver.get(url)
    time.sleep(2) # Let the user actually see something!
    src = driver.page_source
    #result = requests.get(URL)
    #print (result.status_code) #sanity check to ensure that the site exists, should return 200
    #src = result.content #saves the entire source code...
    soup = BeautifulSoup(src)
    for a in soup.findAll('div', attrs={'data-dealer-makes':'honda'}):
        try: #to remove the blank entry (1st one)
            year.append(a.find('span', attrs={'class':'inventory-tile-section-vehicle-name--year-make'}).text[0:4]) #remove the Honda
        except:
            continue
        vehicleName.append(a.find('span', attrs={'class':'inventory-tile-section-vehicle-name--model-name'}).text)
        vehicleDetails.append ("https://www.markhamhonda.com"+ a.find('a', attrs={'data-theme-style':'vehiclePreviewName_color'}).attrs['href'])
        #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
        mileage.append (a.find('div', attrs={'class':'inventory-tile-section-options'}).text.replace('\n', '').replace('Front Wheel DriveAutomatic', '')[:-3])
        #color.append (a.find('td', attrs={'itemprop':'color'}).text)
        try:
            price.append(a.find('span', attrs={'class':'inventory-tile-section-price-tabs-panel-item__total-value'}).text.replace('\n', ''))
        except:
            price.append("N/A")
        try:
            carfaxLink.append(a.find('a', attrs={'style':'display: inline-block;'}).attrs['href'])
        except:
            carfaxLink.append('N/A')

driver.close()
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print (df.head(30))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Markham', index=False)

(30, 6)


In [138]:
# Purpose: A webscraper for Honda Civic from RoadSport Honda

url = "https://www.roadsport.com/used-inventory/index.htm?model=Civic"
soup = BeautifulSoup(selHelp(url))

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})
#print(len(soup.findAll('div', attrs={'itemtype':'itemOffered'})))
for a in soup.findAll('li', attrs={'class':'vehicle-card vehicle-card-detailed vehicle-card-horizontal'}):
    #print (a.findAll('h2'))
    year.append (a.find('span', attrs={'class':'ddc-font-size-small'}).text[0:4])
    vehicleName.append(a.find('h2').text[12:])
    vehicleDetails.append ("https://www.roadsport.com" + a.find('a').attrs['href'])
    #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
    mileage.append (a.find('li', attrs={'class':'odometer'}).text[:-3])
    color.append (a.find('li', attrs={'class':'normalized-swatch-container exteriorColor'}).text)
    try: 
        price.append(a.find('span', attrs={'class':'price-value'}).text)
    except:
        price.append("N/A")
    try:
        carfaxLink.append(a.find('a', attrs={'rel':'nofollow'}).attrs['href'])
    except:
        carfaxLink.append('N/A')
#print (carfaxLink)
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print(df.head(30))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='RoadSport', index=False)
#df.to_csv('Waterloo Honda - Civic.csv', index=False, encoding='utf-8') #to make a .csv file containing this table

(13, 7)


In [139]:
# Purpose: A webscraper for Honda Civic from Maple Honda

url = "https://www.maplehonda.com/used-inventory/index.htm?model=Civic"
soup = BeautifulSoup(selHelp(url))

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})
#print(len(soup.findAll('div', attrs={'itemtype':'itemOffered'})))
for a in soup.findAll('li', attrs={'class':'vehicle-card vehicle-card-detailed vehicle-card-horizontal'}):
    #print (a.findAll('h2'))
    year.append (a.find('span', attrs={'class':'ddc-font-size-small'}).text[0:4])
    vehicleName.append(a.find('h2').text[12:])
    vehicleDetails.append ("https://www.maplehonda.com" + a.find('a').attrs['href'])
    #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
    mileage.append (a.find('li', attrs={'class':'odometer'}).text[:-3])
    color.append (a.find('li', attrs={'class':'normalized-swatch-container exteriorColor'}).text)
    try: 
        price.append(a.find('span', attrs={'class':'price-value'}).text)
    except:
        price.append("N/A")
    try:
        carfaxLink.append(a.find('a', attrs={'rel':'nofollow'}).attrs['href'])
    except:
        carfaxLink.append('N/A')
#print (carfaxLink)
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print(df.head(30))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Maple', index=False)

(5, 7)


In [140]:
# Purpose: A webscraper for Honda Civic from Number 7 Honda

url = "https://www.number7honda.com/used-inventory/index.htm?bodyStyle=Sedan&make=Honda&model=Civic"
soup = BeautifulSoup(selHelp(url))


#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})
#print(len(soup.findAll('div', attrs={'itemtype':'itemOffered'})))
for a in soup.findAll('li', attrs={'class':'vehicle-card vehicle-card-detailed vehicle-card-horizontal'}):
    #print (a.findAll('h2'))
    year.append (a.find('span', attrs={'class':'ddc-font-size-small'}).text[0:4])
    vehicleName.append(a.find('h2').text[12:])
    vehicleDetails.append ("https://www.number7honda.com" + a.find('a').attrs['href'])
    #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
    mileage.append ("Not on main page")
    color.append (a.find('li', attrs={'class':'normalized-swatch-container exteriorColor'}).text)
    try: 
        price.append(a.find('span', attrs={'class':'price-value'}).text)
    except:
        price.append("N/A")
    try:
        carfaxLink.append(a.find('a', attrs={'rel':'nofollow'}).attrs['href'])
    except:
        carfaxLink.append('N/A')
#print (carfaxLink)
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print(df.head(30))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Num7', index=False)

(14, 7)


In [141]:
# Purpose: A webscraper for Honda Civic from Formula Honda

url = "https://www.formulahonda.com/used-vehicles/?_dFR%5Bmodel%5D%5B0%5D=Civic%2520Sedan&_dFR%5Btype%5D%5B0%5D=Used&_dFR%5Btype%5D%5B1%5D=Certified%2520Used"
soup = BeautifulSoup(selHelp(url))

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})
#print(len(soup.findAll('div', attrs={'itemtype':'itemOffered'})))
for a in soup.findAll('div', attrs={'class':'hit'}):
    #print (a.findAll('h2'))
    year.append (a.find('span', attrs={'class':'title-top'}).text[-4:])
    vehicleName.append (a.find('span', attrs={'class':'title-bottom'}).text[6:])
    vehicleDetails.append (a.find('a', attrs={'class':'hit-link'}).attrs['href'])
    #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
    mileage.append (a.find('li', attrs={'class':'vehicle-details--item mileage'}).text.replace('Odometer: ', ''))
    color.append (a.find('li', attrs={'class':'vehicle-details--item vehicle-details--item-color exterior'}).text.replace('Exterior: ', ''))
    try: 
        price.append(a.find('span', attrs={'class':'price'}).text)
    except:
        price.append("N/A")
    #try:
    carfaxLink.append("can't find link")
    #except:
    #    carfaxLink.append('N/A')
#print (vehicleDetails)
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
print(df.head(30))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Formula', index=False)

(7, 7)
             Vehicle  Year             Color  Mileage    Price  \
0  Civic Sedan Sport  2019              Blue  147,675  $16,600   
1     Civic Sedan SE  2018     TAFFETA WHITE   34,959  $17,500   
2     Civic Sedan LX  2019  MODERN STEEL MET   87,491  $17,500   
3     Civic Sedan LX  2019            Silver   56,249  $17,600   
4     Civic Sedan LX  2019  MODERN STEEL MET   68,504  $17,700   
5  Civic Sedan Sport  2019   AEGEAN BLUE MET   66,473  $19,600   
6     Civic Sedan LX  2020             White    9,942  $20,300   

                                                Link      Carfax Link  
0  https://www.formulahonda.com/inventory/used-20...  can't find link  
1  https://www.formulahonda.com/inventory/certifi...  can't find link  
2  https://www.formulahonda.com/inventory/used-20...  can't find link  
3  https://www.formulahonda.com/inventory/certifi...  can't find link  
4  https://www.formulahonda.com/inventory/certifi...  can't find link  
5  https://www.formulahonda.com/

In [142]:
# Purpose: A webscraper for Honda Civic from Parkway Honda

url = "https://www.parkwayhonda.com/used/make/Honda/body/Sedan/model/Civic/s/year/o/desc"
soup = BeautifulSoup(selHelp(url))

#-----------Initialization------------------
model=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]

for a in soup.findAll('div', attrs={'itemtype':'https://schema.org/Car'}):
    #print (len(a))
    year.append (a.find('span', attrs={'itemprop':'releaseDate'}).text)
    model.append (a.find('span', attrs={'itemprop':'model'}).text.replace(' 4dr Sedan',''))
    ##tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
    mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text[:-3])
    color.append (a.find('td', attrs={'itemprop':'color'}).text)
    vehicleDetails.append ("https://www.parkwayhonda.com" + a.find('a', attrs={'class':'stat-text-link'}).attrs['href'])
    try: 
        price.append(a.find('span', attrs={'itemprop':'price'}).text)
    except:
        price.append("N/A")
    try:
        carfaxLink.append(a.find('a', attrs={'class':'carfax_logo_a'}).attrs['href'])
    except:
        carfaxLink.append('N/A')
#print (carfaxLink)
df = pd.DataFrame({'Vehicle':model, 'Year':year, 'Color':color, 'Mileage (km)':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print(df.head(30))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='Parkway', index=False)

(16, 7)
