In [1]:
# Name: Abel
# Date: 2021.07.21
# Purpose: Helper function to append to excel if it exists, and to create one if it doesn't
# Reference: https://stackoverflow.com/questions/38074678/append-existing-excel-sheet-with-new-dataframe-using-python-pandas

import os
from openpyxl import load_workbook


def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
                       truncate_sheet=False, 
                       **to_excel_kwargs):
    """
    Append a DataFrame [df] to existing Excel file [filename]
    into [sheet_name] Sheet.
    If [filename] doesn't exist, then this function will create it.

    @param filename: File path or existing ExcelWriter
                     (Example: '/path/to/file.xlsx')
    @param df: DataFrame to save to workbook
    @param sheet_name: Name of sheet which will contain DataFrame.
                       (default: 'Sheet1')
    @param startrow: upper left cell row to dump data frame.
                     Per default (startrow=None) calculate the last row
                     in the existing DF and write to the next row...
    @param truncate_sheet: truncate (remove and recreate) [sheet_name]
                           before writing DataFrame to Excel file
    @param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
                            [can be a dictionary]
    @return: None

    Usage examples:

    >>> append_df_to_excel('d:/temp/test.xlsx', df)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, header=None, index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2',
                           index=False)

    >>> append_df_to_excel('d:/temp/test.xlsx', df, sheet_name='Sheet2', 
                           index=False, startrow=25)

    (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
    """
    # Excel file doesn't exist - saving and exiting
    if not os.path.isfile(filename):
        df.to_excel(
            filename,
            sheet_name=sheet_name, 
            startrow=startrow if startrow is not None else 0, 
            **to_excel_kwargs)
        return
    
    # ignore [engine] parameter if it was passed
    if 'engine' in to_excel_kwargs:
        to_excel_kwargs.pop('engine')

    writer = pd.ExcelWriter(filename, engine='openpyxl', mode='a')

    # try to open an existing workbook
    writer.book = load_workbook(filename)
    
    # get the last row in the existing Excel sheet
    # if it was not specified explicitly
    if startrow is None and sheet_name in writer.book.sheetnames:
        startrow = writer.book[sheet_name].max_row

    # truncate sheet
    if truncate_sheet and sheet_name in writer.book.sheetnames:
        # index of [sheet_name] sheet
        idx = writer.book.sheetnames.index(sheet_name)
        # remove [sheet_name]
        writer.book.remove(writer.book.worksheets[idx])
        # create an empty sheet [sheet_name] using old index
        writer.book.create_sheet(sheet_name, idx)
    
    # copy existing sheets
    writer.sheets = {ws.title:ws for ws in writer.book.worksheets}

    if startrow is None:
        startrow = 0

    # write out the new sheet
    df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)

    # save the workbook
    writer.save()

In [2]:
# Name: Abel
# Date: 2021.07.20
# Purpose: A webscraper for Honda Civic from Waterloo and Toronto Dealerships
# Reference: Webscraper from Edureka

from bs4 import BeautifulSoup
import pandas as pd
import requests
from datetime import date


In [None]:
#Purpose: A webscraper for Honda Civic from Waterloo Honda

URL = "https://www.waterloohonda.com/en/used-inventory/honda/civic_sedan"
result = requests.get(URL)
#print (result.status_code) #sanity check to ensure that the site exists, should return 200
src = result.content #saves the entire source code...
soup = BeautifulSoup(src)

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
yearMake=[] #List to year & make of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
#carfaxLink=[] #list of carfax
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})
#print (len(numCars))
for a in soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'}):
    yearMake.append (a.find('span', attrs={'class':'inventory-tile-section-vehicle-name--year-make'}).text)
    vehicleName.append(a.find('span', attrs={'class':'inventory-tile-section-vehicle-name--model-name'}).text)
    vehicleDetails.append ("https://www.waterloohonda.com/"+ a.find('a', attrs={'role':'button'}).attrs['href'])
    tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
    mileage.append (a.find('div', attrs={'itemprop':'mileageFromOdometer'}).text.replace('\n', '').strip())
    price.append(a.find('span', attrs={'itemprop':'price'}).text.replace('\n', '').strip())
df = pd.DataFrame({'Vehicle':vehicleName, 'Year & Make':yearMake, 'Tagline': tagline, 'Mileage':mileage, 'Price':price,'Link':vehicleDetails}) 
print (df.shape)
#print( df.head(10))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='WaterlooHonda', index=False)
#df.to_csv('Waterloo Honda - Civic.csv', index=False, encoding='utf-8') #to make a .csv file containing this table

In [45]:
# Purpose: A webscraper for Honda Civic from Toronto Honda

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})

for x in range(1, 10):
    #print ("round:", x)
    URL = "https://www.torontohonda.com/used/Civic.html?pg=" + str(x)
    result = requests.get(URL)
    #print (result.status_code) #sanity check to ensure that the site exists, should return 200
    src = result.content #saves the entire source code...
    soup = BeautifulSoup(src)
    #print (len(numCars))
    for a in soup.findAll('div', attrs={'class':'vehicle-list-cell listing-page-row-padding-0'}):
    #    print(len(a))    
        year.append (a.find('span', attrs={'itemprop':'releaseDate'}).text)
        vehicleName.append(a.find('span', attrs={'itemprop':'model'}).text)
        vehicleDetails.append ("https://www.torontohonda.com/"+ a.find('a', attrs={'data-loc':'vehicle details'}).attrs['href'])
        #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
        mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text)
        color.append (a.find('td', attrs={'itemprop':'color'}).text)
        try:
            price.append(a.find('span', attrs={'itemprop':'price'}).text)
        except:
            price.append("N/A")
        try:
            carfaxLink.append(a.find('a', attrs={'class':'carfax_logo_a'}).attrs['href'])
        except:
            carfaxLink.append('N/A')
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print( df.head(10))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='TorontoHonda', index=False)
#df.to_csv('Waterloo Honda - Civic.csv', index=False, encoding='utf-8') #to make a .csv file containing this table

(40, 7)


In [48]:
# Purpose: A webscraper for Honda Civic from MidTown Honda

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})

for x in range(1, 10):
    #print ("round:", x)
    URL = "https://www.midtownhonda.com/used/model/Civic?pg=" + str(x)
    result = requests.get(URL)
    #print (result.status_code) #sanity check to ensure that the site exists, should return 200
    src = result.content #saves the entire source code...
    soup = BeautifulSoup(src)
    #print (len(numCars))
    for a in soup.findAll('div', attrs={'class':'vehicle-list-cell listing-page-row-padding-0'}):
    #    print(len(a))    
        year.append (a.find('span', attrs={'itemprop':'releaseDate'}).text)
        vehicleName.append(a.find('span', attrs={'itemprop':'model'}).text)
        vehicleDetails.append ("https://www.midtownhonda.com/"+ a.find('a', attrs={'data-loc':'vehicle details'}).attrs['href'])
        #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
        mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text)
        color.append (a.find('td', attrs={'itemprop':'color'}).text)
        try:
            price.append(a.find('span', attrs={'itemprop':'price'}).text)
        except:
            price.append("N/A")
        try:
            carfaxLink.append(a.find('a', attrs={'class':'carfax_logo_a'}).attrs['href'])
        except:
            carfaxLink.append('N/A')
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print( df.head(10))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='MidtownHonda', index=False)
#df.to_csv('Waterloo Honda - Civic.csv', index=False, encoding='utf-8') #to make a .csv file containing this table

In [77]:
# Purpose: A webscraper for Honda Civic from Sisley Honda

#print ("round:", x)
URL = "https://www.sisleyhonda.com/inventory/used?q=&model=Civic+Sedan"
result = requests.get(URL)
#print (result.status_code) #sanity check to ensure that the site exists, should return 200
src = result.content #saves the entire source code...
soup = BeautifulSoup(src)
#print (len(numCars))

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})
#print(len(soup.findAll('div', attrs={'itemtype':'itemOffered'})))    
for a in soup.findAll('div', attrs={'itemprop':'itemOffered'}):
    try: #***this should be placed on top because there's a repeat in the HTML code...***
        price.append(a.find('div', attrs={'class':'vehicle-price right vehicle-price-color'}).text)
    except:
        break
    year.append (a.find('span', attrs={'itemprop':'vehicleModelDate'}).text)
    vehicleName.append(a.find('span', attrs={'itemprop':'model'}).text + " " + a.find('span', attrs={'itemprop':'vehicleConfiguration'}).text)
    vehicleDetails.append (a.find('a', attrs={'itemprop':'url'}).attrs['href'])
    #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
    mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text)
#    color.append (a.find('td', attrs={'itemprop':'color'}).text)
    carfaxLink.append(a.find('a', attrs={'target':'_blank'}).attrs['href'])
#print (year)
#print (vehicleName)
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Mileage':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print( df.head(34))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='SisleyHonda', index=False)
#df.to_csv('Waterloo Honda - Civic.csv', index=False, encoding='utf-8') #to make a .csv file containing this table

(17, 6)


In [81]:
# Purpose: A webscraper for Honda Civic from Richmond Hill Honda

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})

for x in range(1, 10):
    #print ("round:", x)
    URL = "https://www.richmondhillhonda.com/used/model/Civic?pg=" + str(x)
    result = requests.get(URL)
    #print (result.status_code) #sanity check to ensure that the site exists, should return 200
    src = result.content #saves the entire source code...
    soup = BeautifulSoup(src)
    #print (len(numCars))
    for a in soup.findAll('div', attrs={'class':'vehicle-list-cell listing-page-row-padding-0'}):
    #    print(len(a))    
        year.append (a.find('span', attrs={'itemprop':'releaseDate'}).text)
        vehicleName.append(a.find('span', attrs={'itemprop':'model'}).text)
        vehicleDetails.append ("https://www.richmondhillhonda.com/"+ a.find('a', attrs={'data-loc':'vehicle details'}).attrs['href'])
        #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
        mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text)
        color.append (a.find('td', attrs={'itemprop':'color'}).text)
        try:
            price.append(a.find('span', attrs={'itemprop':'price'}).text)
        except:
            price.append("N/A")
        try:
            carfaxLink.append(a.find('a', attrs={'class':'carfax_logo_a'}).attrs['href'])
        except:
            carfaxLink.append('N/A')
df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
print (df.shape)
#print( df.head(30))
append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='RichmondHillHonda', index=False)
#df.to_csv('Waterloo Honda - Civic.csv', index=False, encoding='utf-8') #to make a .csv file containing this table

(27, 7)


In [135]:
# Purpose: A webscraper for Honda Civic from Richmond Hill Honda

#-----------Initialization------------------
vehicleName=[] #List to vehicle model and trim of the product
year=[] #List to year  of the product
tagline=[] #List of taglines
mileage=[] # List of mileage
price=[] #List to store price of the product
vehicleDetails=[] #List to store rating of the product
carfaxLink=[] #list of carfax
color=[]
#numCars = soup.findAll('article', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item small-12 medium-6 large-4 cell'})

for x in range(1, 10):
    #print ("round:", x)
    URL = "https://www.markhamhonda.com/en/used-inventory?modelId=2311&namedSorting=default&limit=24&frameStyleId=1&page=" + str(x)
    result = requests.get(URL)
    #print (result.status_code) #sanity check to ensure that the site exists, should return 200
    src = result.content #saves the entire source code...
    soup = BeautifulSoup(src)
    print (soup)
    #print (len(numCars))
    for a in soup.findAll('div', attrs={'class':'inventory-tile inventory-listing-charlie__vehicles-item'}):
        print(a.find('a'))    
#        year.append (a.find('span'))
#        vehicleName.append(a.find('span', attrs={'itemprop':'model'}).text)
#        vehicleDetails.append ("https://www.richmondhillhonda.com/"+ a.find('a', attrs={'data-loc':'vehicle details'}).attrs['href'])
        #tagline.append (a.find('div', attrs={'class':'inventory-tile-section-tagline'}).text.replace('\n', ''))
#        mileage.append (a.find('span', attrs={'itemprop':'mileageFromOdometer'}).text)
#        color.append (a.find('td', attrs={'itemprop':'color'}).text)
#        try:
#            price.append(a.find('span', attrs={'itemprop':'price'}).text)
#        except:
#            price.append("N/A")
#        try:
#            carfaxLink.append(a.find('a', attrs={'class':'carfax_logo_a'}).attrs['href'])
#        except:
#            carfaxLink.append('N/A')
#df = pd.DataFrame({'Vehicle':vehicleName, 'Year':year, 'Color':color, 'Mileage':mileage, 'Price':price, 'Link':vehicleDetails, 'Carfax Link':carfaxLink}) 
#print (df.shape)
#print( df.head(30))
#append_df_to_excel (date.today().strftime("%Y.%m.%d") + ' Honda Civics.xlsx', df, sheet_name='RichmondHillHonda', index=False)
#df.to_csv('Waterloo Honda - Civic.csv', index=False, encoding='utf-8') #to make a .csv file containing this table

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
<meta content="SKYPE_TOOLBAR_PARSER_COMPATIBLE" name="SKYPE_TOOLBAR"/>
<link href="/en/used-inventory" rel="canonical"/>
<title>Markham Honda | Used vehicles for Sale</title>
<meta content="We carry quality cars and SUVs in our Used vehicles inventory. Book a test drive today or contact us for prices and more information in Markham." name="description"/>
<meta content="website" property="og:type"/>
<meta content="Markham Honda | Used vehicles for Sale" property="og:title"/>
<meta content="We carry quality cars and SUVs in our Used vehicles inventory. Book a test drive today or contact us for prices and more information in Markham." property="og:description"/>
<meta content="Markham Honda" property="og:site_name"/>
<meta content="/en/used-inventory" property="og:url"/>
<meta content="695724751313999" property="fb:app_id"/>
<meta content="summary" name="twitter: