In [2]:
import pandas as pd

In [3]:
import requests
from bs4 import BeautifulSoup

def scrape_amazon_reviews(url, max_pages=5):
    """Scrapes reviews from an Amazon product page up to a specified limit.

    Args:
        url (str): The URL of the Amazon product page.
        max_pages (int, optional): The maximum number of review pages to scrape. Defaults to 2.

    Returns:
        list: A list of dictionaries, where each dictionary contains information about a review.
    """

    reviews = []
    headers = {'User-Agent': 'Mozilla/5.0'}  # Simulate a browser to avoid detection

    for page in range(1, max_pages + 1):
        page_url = f"{url}&pageNumber={page}"
        # print(page_url)
        response = requests.get(page_url, headers=headers)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            review_elements = soup.find_all('div', {'data-hook': 'review'})

            for review in review_elements:
                review_data = {}
                # Extract review content based on Amazon's HTML structure (replace placeholders with actual selectors)
                review_data['author'] = review.find('span', {'class': "a-profile-name"}).text.strip()  # Replace with appropriate selector
                review_data['rating & title'] = review.find('a', {'data-hook': 'review-title'}).text.strip()  # Replace with appropriate selector
                # print(review_data['author'])
                # print(review_data['rating & title'])
                review_data['text'] = review.find('span', {'data-hook': 'review-body'}).text.strip()  # Replace with appropriate selector
                # print(review_data['text'])
                # ... Extract other relevant data as needed

                reviews.append(review_data)
            else:
                print(f"No reviews found on page {page}")
        else:
            print(f"Error: Request failed with status code {response.status_code}")

    return reviews

# Example usage (replace with a valid product URL)
product_url = "https://www.amazon.in/Apple-iPhone-Plus-256GB-Blue/product-reviews/B0BDJQHJZZ/ref=cm_cr_getr_d_paging_btm_next_2?ie=UTF8&reviewerType=all_reviews&pageNumber="
scraped_reviews = scrape_amazon_reviews(product_url)

# Process or store the scraped reviews responsibly (e.g., avoid overwhelming Amazon's servers)
print(scraped_reviews)

No reviews found on page 1
No reviews found on page 2
No reviews found on page 3
No reviews found on page 4
No reviews found on page 5
[{'author': 'Mallajeet Sonowal', 'rating & title': '5.0 out of 5 stars\nFirst experience with IOS - its different', 'text': 'I am writing this review after using it for1 month. This is my first apple device. I upgraded from OP7T to this device. My prime requirement was superior battery life and looking for an overall decent good phone. So I was initially inclined to buying OP 11 but ever since OP was acquired by VIVO, I felt that OP has lost its originality and as a result my interest for OP11 was somehow withdrawn.  After one month of usage of IP14+,  I find that in some aspect IOS is superior to android and in some other aspects android is superior to IOS. As a very normal mobile phone user, I’ll try to highlight the difficulties and good aspects of the device I noticed while using it for the last one month:Pros:1. Battery life is outstanding. When I 

In [4]:
df = pd.DataFrame(scraped_reviews)

In [5]:
df.to_csv("review_data.csv",index=False)

In [6]:
df = pd.read_csv('review_data.csv')

In [7]:
df.head()

Unnamed: 0,author,rating & title,text
0,Mallajeet Sonowal,5.0 out of 5 stars\nFirst experience with IOS ...,I am writing this review after using it for1 m...
1,Pritam Shetty,5.0 out of 5 stars\nSmooth Transition to iPhon...,I recently made the switch from Android to the...
2,Dwarka Chary,"4.0 out of 5 stars\nPlus sized iPhone , not fo...",The phone is a standard iPhone blown up to the...
3,Gokul,5.0 out of 5 stars\nIt’s my first iPhone exper...,It’s my first iphone and i see that it has a v...
4,Kushal Achchha,5.0 out of 5 stars\nIphone review,Very good quality n safely delivered


In [8]:
rating = []
title = []
for i in df['rating & title'].to_list():
    a, b = i.split('\n')
    rating.append(a[:3])
    title.append(b)
    


In [9]:
df['title'] = title
df['rating'] = rating

In [10]:
df.drop(columns=['rating & title'],inplace=True)

In [11]:
text = df.pop('text')
df['review'] = text

In [12]:
df

Unnamed: 0,author,title,rating,review
0,Mallajeet Sonowal,First experience with IOS - its different,5.0,I am writing this review after using it for1 m...
1,Pritam Shetty,Smooth Transition to iPhone 14 Plus,5.0,I recently made the switch from Android to the...
2,Dwarka Chary,"Plus sized iPhone , not for all",4.0,The phone is a standard iPhone blown up to the...
3,Gokul,It’s my first iPhone experience,5.0,It’s my first iphone and i see that it has a v...
4,Kushal Achchha,Iphone review,5.0,Very good quality n safely delivered
5,Tushar Soni,Value for money,5.0,Product is good as it described in its details...
6,Kshitija sontakke,best,5.0,My first iphone . I loved it. Impressed with t...
7,Bhupendra,Camera and flash light black out,1.0,My mobile back camera is balck out it is under...
8,Amazon Customer,Overall Good,4.0,The phone is good the size of the phone for th...
9,Adil khan,Sahi hai,4.0,Sahi hai


In [13]:
df.to_csv('UpdatedReviewsData.csv', index=False)