In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

In [2]:
headers = {
    'authority': 'www.amazon.com',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-language': 'en-US,en;q=0.9,bn;q=0.8',
    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
}

In [3]:
reviews_url = 'https://www.amazon.in/i9-14900K-Desktop-Processor-Integrated-Graphics/product-reviews/B0CGJDKLB8/ref=cm_cr_unknown?ie=UTF8&reviewerType=all_reviews&filterByStar=one_star&pageNumber=4'

In [4]:
len_page = 4

In [5]:
def reviewsHtml(url, len_page):
    
    # Empty List define to store all pages html data
    soups = []
    
    # Loop for gather all 3000 reviews from 300 pages via range
    for page_no in range(1, len_page + 1):
        
        # parameter set as page no to the requests body
        params = {
            'ie': 'UTF8',
            'reviewerType': 'all_reviews',
            'filterByStar': 'critical',
            'pageNumber': page_no,
        }
        
        # Request make for each page
        response = requests.get(url, headers=headers, params=params)
        
        # Save Html object by using BeautifulSoup4 and lxml parser
        soup = BeautifulSoup(response.text, 'lxml')
        
        # Add single Html page data in master soups list
        soups.append(soup)
        
    return soups

In [6]:
def getReviews(html_data):

    # Create Empty list to Hold all data
    data_dicts = []
    
    
    # Select all Reviews BOX html using css selector
    boxes = html_data.select('div[data-hook="review"]')
    
    # Iterate all Reviews BOX 
    for box in boxes:
        
        # Select Name using css selector and cleaning text using strip()
        # If Value is empty define value with 'N/A' for all.
        try:
            name = box.select_one('[class="a-profile-name"]').text.strip()
        except Exception as e:
            name = 'N/A'

        try:
            stars = box.select_one('[data-hook="review-star-rating"]').text.strip().split(' out')[0]
        except Exception as e:
            stars = 'N/A'   

        try:
            title = box.select_one('[data-hook="review-title"]').text.strip()
        except Exception as e:
            title = 'N/A'

        try:
            # Convert date str to dd/mm/yyy format
            datetime_str = box.select_one('[data-hook="review-date"]').text.strip().split(' on ')[-1]
            date = datetime.strptime(datetime_str, '%B %d, %Y').strftime("%d/%m/%Y")
        except Exception as e:
            date = 'N/A'

        try:
            description = box.select_one('[data-hook="review-body"]').text.strip()
        except Exception as e:
            description = 'N/A'

        # create Dictionary with al review data 
        data_dict = {
            'Name' : name,
            'Stars' : stars,
            'Title' : title,
            'Date' : date,
            'Description' : description
        }

        # Add Dictionary in master empty List
        data_dicts.append(data_dict)
    
    return data_dicts

In [7]:
html_datas = reviewsHtml(reviews_url, len_page)

In [8]:
reviews = []

In [9]:
for html_data in html_datas:
    
    # Grab review data
    review = getReviews(html_data)
    
    # add review data in reviews empty list
    reviews += review

In [10]:
df_reviews = pd.DataFrame(reviews)

In [11]:
df_reviews

Unnamed: 0,Name,Stars,Title,Date,Description
0,Antoine Sarkis,,Defective item,,I received a defective i7 14700kThe system wil...
1,Amazon Kunde,,Defekt,,"Verpackung beschädigt, CPU war gebraucht und d..."
2,Andre Lup,,SO MANY PROBLEMS,,This CPU is a piece of garbage. Can’t run most...
3,Kindle Customer,,Unstable Out Of The Box.,,"Installed on an Asus Z790 Hero motherboard, th..."
4,Erebus,,Windows install wants a network but no network...,,"While you are installing windows, when you get..."
5,Zamo’s,,New Intel I7 was delivered with the open and t...,,Intel I7 processor was delivered with the open...
6,Steve R. Schamberger,,Died after 4 months,,"It's fast, but it's not great for quality"
7,Alex,,"Re-packaged, broken CPU",,CPU1 fails CPU Float test from Intel's diagnos...
8,Ada,,Did not work,,Failed CPU after only a week of building the p...
9,Justin,,Was a loyal intel consumer until this.,,Brand new chip doesn’t even work. How’d are th...


In [263]:
df_reviews.to_csv('reviews93_star1.csv', index=False)