In [6]:
"""
MAIN PROGRAM
"""

# import libraries
import csv
from bs4 import BeautifulSoup
from selenium import webdriver
import chromedriver_binary
import pandas as pd

# takes in a search term, returns amazon search url
def get_url(search_term):
    
    # construct template for search link
    template = 'https://www.amazon.com/s?k={}&ref=nb_sb_noss_1'
    search_term = search_term.replace(' ', '+')
    
    # add term query to url
    url = template.format(search_term)
    
    # add a placeholder for page number
    url += '&page={}'
    
    return url

# given an item on the site, extract the description, url, price, rating, and review count
def extract_record(item): 
    
    # description and url
    atag = item.h2.a
    description = atag.text.strip()
    url = 'https://www.amazon.com' + atag.get('href')
    
    # price 
    try: 
        price_parent = item.find('span', 'a-price')
        price = price_parent.find('span', 'a-offscreen').text
    
    except AttributeError: 
        return
    
    # rating and review count
    try: 
        rating = item.i.text
        review_count = item.find('span', 'a-size-base').text
        
    except AttributeError: 
        rating = ''
        review_count = ''
    
    result = (description, price, rating, review_count, url)
    
    return result

# main function, inputs a search term (string) and writes results out to a csv
def main(search_term): 
    
    # open a webdriver
    driver = webdriver.Chrome()
    
    # initialize records list
    records = []
    
    # construct the url
    url = get_url(search_term)
    
    # iterate through the first 20 result pages
    for page in range(1, 21): 
        
        # retrieve the page and parse the html
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        
        # get all divs that correspond to products
        results = soup.find_all('div', {'data-component-type': 's-search-result'})
        
        # for each product on the page, extract the information and append it to records
        for item in results: 
            record = extract_record(item)
            if record: 
                records.append(record)
        
    # close the driver
    driver.close() 
    
    # write the results to results.csv
    with open('results.csv', 'w', newline='', encoding='utf-8') as f: 
        writer = csv.writer(f)
        writer.writerow(['Description', 'Price', 'Rating', 'ReviewCount', 'Url'])
        writer.writerows(records)

In [3]:
"""
SAMPLE RUN USING 'noise cancelling headphones'
""" 

main('noise cancelling headphones')

In [8]:
"""
SAMPLE RESULT
"""

results = pd.read_csv("results.csv")
results.head(10)

Unnamed: 0,Description,Price,Rating,ReviewCount,Url
0,Qisebin E7 Active Noise Cancelling Headphones ...,$50.15,4.4 out of 5 stars,40,https://www.amazon.com/gp/slredirect/picassoRe...
1,"ZIHNIC Active Noise Cancelling Headphones, 40H...",$59.99,4.4 out of 5 stars,290,https://www.amazon.com/gp/slredirect/picassoRe...
2,Beats Studio3 Wireless Noise Cancelling Over-E...,$226.99,4.7 out of 5 stars,7859,https://www.amazon.com/Beats-Studio3-Wireless-...
3,New Bose QuietComfort 45 Bluetooth Wireless No...,$329.00,4.6 out of 5 stars,881,https://www.amazon.com/Bose-QuietComfort-45-Bl...
4,Sony WH-1000XM4 Wireless Industry Leading Nois...,$348.00,4.7 out of 5 stars,26292,https://www.amazon.com/Sony-WH-1000XM4-Canceli...
5,"Qisebin Active Noise Cancelling Headphones, E7...",$57.39,4.0 out of 5 stars,23,https://www.amazon.com/gp/slredirect/picassoRe...
6,Anker Soundcore Life Q20 Hybrid Active Noise C...,$53.99,4.5 out of 5 stars,32458,https://www.amazon.com/Soundcore-Cancelling-He...
7,Soundcore by Anker Life Q30 Hybrid Active Nois...,$79.99,4.6 out of 5 stars,18158,https://www.amazon.com/Soundcore-Cancelling-He...
8,MOVSSOU E7 Active Noise Cancelling Headphones ...,$34.99,4.4 out of 5 stars,1148,https://www.amazon.com/gp/slredirect/picassoRe...
9,Srhythm NC25 Active Noise Cancelling Stereo He...,$59.99,4.4 out of 5 stars,5431,https://www.amazon.com/Cancelling-Headphones-L...
