## Amazon Web Scraping

In [14]:
# Libraries

from bs4 import BeautifulSoup
from selenium import webdriver     # Webdriver : Remote control interface
import pandas as pd

In [15]:
# Constants

URL_TEMPLATE = 'https://www.amazon.sg/s?k={}'
URL = 'https://www.amazon.sg' 

In [16]:
# Functions

def get_url(keyword: str) -> str:
    """
    Getting the url of the webpage.

    Parameters:
    --------------------------
        keyword (str): Input word to place in the search key.
    --------------------------
    Returns:
        (str): The URL website of the webpage. 
    """
    template = URL_TEMPLATE

    # Replace space by + symbol (use for website url). Example : 'tennis ball' --> 'tennis+ball'
    keyword = keyword.replace(' ','+')     
    
    # format : add argument into {}
    url = template.format(keyword)        
    url += '&page{}'
    return url

def extract_record(item: object) -> tuple:
    """
    Extracting the information from a specific product.

    Parameters:
    --------------------------
        item (object): HTML variable containing the product information.
    --------------------------
    Returns:
        (tuple): Product information such as product title, price, rating, and item URL. 
    """
    
    # Description and url
    atag = item.a
    description = item.find('h2', class_='a-size-base-plus').get_text()
    url = URL + atag.get('href')
    
    try:
        # Price
        price_parent = item.find('span', 'a-price')
        price = price_parent.find('span', 'a-offscreen').text
    except AttributeError:
        return
    
    try:
        # Rating
        rating = item.i.text
    except AttributeError: 
        rating = ''
        
    result = (description, price, rating, url)
    
    return result

def main(keyword: str) -> list:
    """
    Extracting the information from a specific product with the following steps:
    1 - Lunching the webdriver (browser used for the going to the website)
    2 - Going to the main website
    3 - Typing the keyword in the searchbar and entering to the 8 first pages
    4 - Extract the HTML code with every href from every product item
    5 - Going to every product item website
    6 - Extracting the information (description, price, rating, URL)
    7 - Collect the information in a list
    8 - Close the webdriver

    Parameters:
    --------------------------
        keyword (str): Input word to place in the search key.
    --------------------------
    Returns:
        (list): Product information such as product title, price, rating, and item URL. 
    """
   
    # Startup the webdriver
    driver = webdriver.Chrome()
    
    # Save the items information in a list
    records = []
    url = get_url(keyword)
    
    # Check the first 8 pages of the website
    for page in range (1, 8): 
        driver.get(url.format(page))

        # Extract the information as HTML type from the website given
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Each information (product title, price, rating, etc.) are placed into section called division.
        # The purpose of the variable "results" is to collect every piece of "div" from the HTML data. 

        results = soup.find_all('div', {'data-component-type': 's-search-result'})
        
        for item in results:
            record = extract_record(item)
            if record : 
                records.append(record)
                
    # Close the webdriver
    driver.close()
    
    return records

In [17]:
# Example keyword to search in the website
keyword = 'sunglasses'
records = main(keyword)

## Visualisation of the Records

In [18]:
headers = [['Description', 'Price', 'Rating', 'URL']]
df = pd.DataFrame(records, columns = headers)

In [19]:
df

Unnamed: 0,Description,Price,Rating,URL
0,Classic Rectangle Polarized Matte Finished Sun...,S$19.99,4.0 out of 5 stars,https://www.amazon.sg/sspa/click?ie=UTF8&spc=M...
1,Natural Wood Temple Rectangle Plastic Frame Po...,S$25.99,4.1 out of 5 stars,https://www.amazon.sg/sspa/click?ie=UTF8&spc=M...
2,Classic Retro Round Gradient Polarized Mirrore...,S$23.99,4.2 out of 5 stars,https://www.amazon.sg/sspa/click?ie=UTF8&spc=M...
3,Stainless Steel Polarized Aviator Sunglasses f...,S$27.99,4.2 out of 5 stars,https://www.amazon.sg/sspa/click?ie=UTF8&spc=M...
4,"KALIYADI Polarized Sunglasses for Men, Lightwe...",S$34.26,4.6 out of 5 stars,https://www.amazon.sg/KALIYADI-Polarized-Sungl...
...,...,...,...,...
355,Coleman,S$25.74,4.2 out of 5 stars,https://www.amazon.sg/Coleman-CO3079-3-Polariz...
356,Wetnenss Replacement Lenses for Oakley Holbroo...,S$27.00,4.0 out of 5 stars,https://www.amazon.sg/sspa/click?ie=UTF8&spc=M...
357,2 Pairs Strawberry Sunglasses Funny Sunglasses...,S$8.19,,https://www.amazon.sg/sspa/click?ie=UTF8&spc=M...
358,Cat Eye Shades Sun Glasses Outdoor Accessories...,S$9.19,,https://www.amazon.sg/sspa/click?ie=UTF8&spc=M...
