# Team 6 - Web Scraping Amazon Reviews on TOZO W1 Wireless Charger

Team 6: Lai Leng Chan, Minsu Kim, Christopher Garcia

Objective:

## API Scraping & Data Ingestion

In [1]:
# Importing necessary libraries
import os
import pandas as pd
import datetime
import re
import requests
import time
from bs4 import BeautifulSoup
from collections import defaultdict, Counter
import random
from urllib.parse import urlencode

In [2]:
# After inspecting the HTML of the review page, we can see that the HTML line which we are trying 
# to scrape and extract follows the below format:
# <span class="a-profile-name">Nick</span>
# where span is the tag for the HTML line and the class is "a-profile-name"

# Define the list of URL that will be scraped

# Define the base URL
base_url = 'https://www.amazon.com/TOZO-Wireless-Upgraded-Sleep-Friendly-FastCharging/product-reviews/B07FM8R7J1/ref=cm_cr_getr_d_paging_btm_next_{}?ie=UTF8&reviewerType=all_reviews&pageNumber={}'

# Set the desired maximum number of pages
max_pages = 500  

# Generate the list of URLs
list_of_urls = [base_url.format(page_number, page_number) for page_number in range(1, max_pages + 1)]

In [3]:
# Retrieve each of the url's HTML data and convert the data into a beautiful soup object
# Find, extract and store reviewer names and review text into a list

# We will extract the data from the webpage via ScrapeAPI

reviews = []
star_ratings = []
data_string = ""

for url in list_of_urls: 
    params = {'api_key': "1327eb1bf20892837140fb2ade3f0714", 'url': url}
    response = requests.get('http://api.scraperapi.com/',   params=urlencode(params))
    soup = BeautifulSoup(response.text, 'html.parser')

    for item in soup.find_all("span", {"data-hook": "review-body"}):
      data_string = data_string + item.get_text()
      reviews.append(data_string)
      data_string = ""
    
    for item in soup.find_all("i", {"data-hook": "review-star-rating"}):
      star_ratings.append(item.get_text())

In [5]:
# Let's set up a dictionary to hold our customer names and reviews
reviews_dict = {'Reviews': reviews, 'Star Ratings': star_ratings}

# Print the lengths of each list.  
print(len(star_ratings), len(reviews))

4800 4800


In [6]:
# Convert the dictionary to a DataFrame
reviews_df = pd.DataFrame.from_dict(reviews_dict, orient='index')

# Convert dataframe to CSV file for easier assess in the future
reviews_df.to_csv('reviews.csv', index=False, header=True)

In [7]:
reviews_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799
Reviews,"\nFirst, this charger looks fantastic. I got t...",\nUPDATE: Well...I was wrong about this wirel...,\nThis works perfectly on the side table while...,\nI purchased this charger for use in our camp...,"\nGot this for my Samsung Galaxy S23 phone, an...",\nI ordered this charger after using one at my...,\nCame here to see if there was any tips on ho...,\nDevice must be placed carefully or it will f...,\nCharges like advertised. I did have to take ...,\nI wanted something to use instead of the typ...,...,"\nEven with Magsafe case on, charging is super...",\nGreat charger! Able to charge my phone easil...,\nBought two of these and one was giving me pr...,\nI saw this on one of my colleagues desk at w...,\nAn excellent product. It’s in our kitchen as...,\nIt’s PERFECT for traveling. No more differen...,\n\n\n\n\n The media could ...,\nImpressed taking it out of the box. All met...,\nI love the design of this charger. Color on ...,\nkinda finnicky cuz ur phone has to be in the...
Star Ratings,4.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,4.0 out of 5 stars,5.0 out of 5 stars,4.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,...,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,3.0 out of 5 stars,4.0 out of 5 stars


## Data Proprocessing

In [9]:
# Reading the downloaded raw data into a dataframe
reviews_df = pd.read_csv('C:\\Users\\annie\\Documents\\GitHub\\SMS_SpamClassification\\reviews.csv')
reviews_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4790,4791,4792,4793,4794,4795,4796,4797,4798,4799
0,"\nFirst, this charger looks fantastic. I got t...",\nUPDATE: Well...I was wrong about this wirel...,\nThis works perfectly on the side table while...,\nI purchased this charger for use in our camp...,"\nGot this for my Samsung Galaxy S23 phone, an...",\nI ordered this charger after using one at my...,\nCame here to see if there was any tips on ho...,\nDevice must be placed carefully or it will f...,\nCharges like advertised. I did have to take ...,\nI wanted something to use instead of the typ...,...,"\nEven with Magsafe case on, charging is super...",\nGreat charger! Able to charge my phone easil...,\nBought two of these and one was giving me pr...,\nI saw this on one of my colleagues desk at w...,\nAn excellent product. It’s in our kitchen as...,\nIt’s PERFECT for traveling. No more differen...,\n\n\n\n\n The media could ...,\nImpressed taking it out of the box. All met...,\nI love the design of this charger. Color on ...,\nkinda finnicky cuz ur phone has to be in the...
1,4.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,4.0 out of 5 stars,5.0 out of 5 stars,4.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,...,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,5.0 out of 5 stars,3.0 out of 5 stars,4.0 out of 5 stars
