In [1]:
# Importing Libraries
from selenium import webdriver
from bs4 import BeautifulSoup
import chromedriver_binary
import time
import pandas as pd
import openpyxl
import numpy as np

In [2]:
# Setting up webdriver
webdriver = webdriver.Chrome()
webdriver.implicitly_wait(10)

In [3]:
# Launching Amazon.ca 
webdriver.get("https://amazon.ca")

In [4]:
# Entering Location Info in Order To Find Better Results
webdriver.find_element_by_id("nav-global-location-popover-link").click()
postalcode_first3 = webdriver.find_element_by_id("GLUXZipUpdateInput_0")
postalcode_last3 = webdriver.find_element_by_id("GLUXZipUpdateInput_1")
postalcode_first3.send_keys("L6Z")
postalcode_last3.send_keys("0H9")


In [5]:
# Submitting Location Info
webdriver.find_element_by_id("GLUXZipUpdate").click()

In [6]:

def SearchResults(querylink):
    
    webdriver.get(querylink)
    
    # Set up beautiful soup
    soup = BeautifulSoup(webdriver.page_source)
    
    # Create empty variables to use later
    attributes = []
    items = []
    
    # Find 'next page' button
    next_page_button = webdriver.find_element_by_class_name("s-pagination-next")

    # Create our dataframe
    Results_Dataframe = pd.DataFrame(columns=["Name", "Price", "Link"])


    # Loop through pages while parsing data until the next page button is disabled
    while "s-pagination-disabled" not in attributes:

        results = soup.findAll("div", {"data-component-type": "s-search-result"})

        for result in results:
            name = result.find("span", {"class": "a-text-normal"})
            price = result.find("span", {"class": "a-price-whole"})
            link = result.find("a", {"class": "a-link-normal"})

            # only add items with a listed price
            if price: 
                item = [name.text, price.text, link['href']]
                items.append(item)

        # update dataframe
        dataframe = pd.DataFrame.from_records(items, columns = ["Name", "Price", "Link"])
        Results_Dataframe = pd.concat([Results_Dataframe, dataframe])

        # go to next page, reset 'next_page_button' var, and recheck its attributes
        next_page_button.click()
        next_page_button = webdriver.find_element_by_class_name("s-pagination-next")
        attributes = next_page_button.get_attribute("class").split() 

        # cooldown period
        time.sleep(2)
        
    # Convert Price Column to a Float
    Results_Dataframe['Price'] = Results_Dataframe['Price'].str.replace(",","")
    Results_Dataframe['Price'] = Results_Dataframe['Price'].astype(float)
    
    return Results_Dataframe
        

In [7]:
# Amazon query link
ResultsDataFrame = SearchResults("https://www.amazon.ca/s?k=3080+ti&i=electronics&rh=n%3A677243011%2Cp_n_feature_browse-bin%3A23883856011%2Cp_n_feature_fourteen_browse-bin%3A58373581011&dc&ds=v1%3AkJ3wrzdxa8ZRlN2EEN59lVrcQ%2F0OIEkAVMZB4ClFyOc&crid=20SQBE23HQYQG&qid=1673604787&rnid=58373578011&sprefix=3080+ti%2Caps%2C69&ref=sr_nr_p_n_feature_fourteen_browse-bin_4")

# Filter items by name / price
itemFilter = "3080 Ti"
priceFilter = 3000
NewDataFrame = ResultsDataFrame[(ResultsDataFrame.Name.str.contains(itemFilter)) & (ResultsDataFrame.Price <= priceFilter)]

# Drop any potential duplicate listings from the dataframe
NewDataFrame = NewDataFrame[~NewDataFrame.index.duplicated(keep='first')]
NewDataFrame.drop_duplicates()


# Fix any incomplete links to items
dataFrame_as_array = np.array(NewDataFrame)

for x in range(len(dataFrame_as_array)):
    
    if "https://www.amazon.ca" not in dataFrame_as_array[x][2]:
        dataFrame_as_array[x][2] =  "https://www.amazon.ca" + dataFrame_as_array[x][2]
        
df = pd.DataFrame(dataFrame_as_array)

# Export dataframe to excel file, create it if it doesn't exist.
df.to_excel("output.xlsx")

