# Amazon price scraper

In [1]:
import csv
from bs4 import BeautifulSoup
from msedge.selenium_tools import Edge, EdgeOptions
from selenium import webdriver


In [2]:
def get_url(search_text):
    """Generate a url from search text"""
    template = 'https://www.amazon.com/s?k={}&ref=nb_sb_noss_1'
    search_term = search_text.replace(' ', '+')
    
    # add term query to url
    url = template.format(search_term)
    
    # add page query placeholder
    url += '&page{}'
        
    return url

In [3]:
def extract_record(item):
    """Extract and return data from a single record"""
    
    # description and url
    atag = item.h2.a
    description = atag.text.strip()
    url = 'https://www.amazon.com' + atag.get('href')
    try:
        # product price
        price_parent = item.find('span', 'a-price')
        price = price_parent.find('span', 'a-offscreen').text
    except AttributeError:
        return
    
    try:
        # rating and review count
        rating = item.i.text
        review_count = item.find('span', {'class': 'a-size-base', 'dir': 'auto'}).text
    except AttributeError:
        rating = ''
        review_count = ''
        
    result = (description, price, rating, review_count, url)
    
    return result

In [4]:
def main(search_term):
    """Run main program routine"""
    
    # startup the webdriver
    options = webdriver.ChromeOptions()
    options.use_chromium = True
    path = "/Users/ahmedmostafa/Downloads/web_scaping_projects/chromedriver-mac-arm64/chromedriver"
    driver = webdriver.Chrome(executable_path=path, options=options)

    # driver = Edge(options=options)
    
    records = []
    url = get_url(search_term)
    
    for page in range(1, 21):
        driver.get(url.format(page))
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        results = soup.find_all('div', {'data-component-type': 's-search-result'})
        for item in results:
            record = extract_record(item)
            if record:
                records.append(record)
    
    driver.close()
    
    # save data to csv file
    with open('results.csv', 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Description', 'Price', 'Rating', 'ReviewCount', 'Url'])
        writer.writerows(records)


In [5]:
# run program
# export PATH=$PATH:/Users/ahmedmostafa/Downloads/web_scaping_projects/edgedriver_arm64/msedgedriver.exe
main('ultrawide monitor')

In [8]:
# options = EdgeOptions()
# options.use_chromium = True
# driver = Edge(options=options)

In [5]:
driver.get('https://www.amazon.com/GIGABYTE-Radeon-Graphics-128-Bit-GV-R55XTOC-8GD/dp/B082BXG6Z5/ref=sr_1_4?dchild=1&keywords=amd+video+card&qid=1603932104&sr=8-4')

In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

driver = webdriver.Chrome()

driver.get('https://bing.com')

element = driver.find_element(By.ID, 'sb_form_q')
element.send_keys('WebDriver')
element.submit()

time.sleep(5)
driver.quit()

WebDriverException: Message: Service chromedriver unexpectedly exited. Status code was: -9


In [7]:
# get the buy box table and rows
table = driver.find_element_by_class_name('buybox-tabular-container')
table_rows = table.find_elements_by_tag_name('tr')

# ships from
ships_from = table_rows[0].find_elements_by_tag_name('td')[1].text

# sold by
sold_by = table_rows[1].find_elements_by_tag_name('td')[1].text

NameError: name 'driver' is not defined

'Amazon.com'

Unnamed: 0,description,price,rating,review_count,url
0,"14.1'' Laptop Screen Extender, 1080P FHD Porta...",179,4.5 out of 5 stars,,https://www.amazon.com/sspa/click?ie=UTF8&spc=...
1,"13-14 Inch Laptop Case Hard Shell, Protective ...",37,4.5 out of 5 stars,,https://www.amazon.com/sspa/click?ie=UTF8&spc=...
2,Dell Latitude 5320 2in1 Gen2 Touchscreen Lapto...,429,4.7 out of 5 stars,,https://www.amazon.com/Dell-Latitude-Touchscre...
3,"Inspiron 15 15.6"" Laptop (FHD Touchscreen, AMD...",495,4.0 out of 5 stars,,https://www.amazon.com/Dell-Inspiron-Touchscre...
4,"Latitude 7430 7000 Business Laptop (14"" FHD, I...",844,,,https://www.amazon.com/Dell-Latitude-7420-Busi...


Unnamed: 0,description,price,rating,review_count,url
0,"14.1'' Laptop Screen Extender, 1080P FHD Porta...",179.0,4.5,0.0,https://www.amazon.com/sspa/click?ie=UTF8&spc=...
1,"13-14 Inch Laptop Case Hard Shell, Protective ...",37.0,4.5,0.0,https://www.amazon.com/sspa/click?ie=UTF8&spc=...
2,Dell Latitude 5320 2in1 Gen2 Touchscreen Lapto...,429.0,4.7,0.0,https://www.amazon.com/Dell-Latitude-Touchscre...
3,"Inspiron 15 15.6"" Laptop (FHD Touchscreen, AMD...",495.0,4.0,0.0,https://www.amazon.com/Dell-Inspiron-Touchscre...
4,"Latitude 7430 7000 Business Laptop (14"" FHD, I...",844.0,4.29596,0.0,https://www.amazon.com/Dell-Latitude-7420-Busi...


Unnamed: 0,price,rating,review_count
count,117.0,117.0,117.0
mean,122.760684,4.295891,0.0
std,171.042576,0.458242,0.0
min,9.0,2.3,0.0
25%,29.0,4.2,0.0
50%,45.0,4.3,0.0
75%,139.0,4.6,0.0
max,844.0,5.0,0.0
