In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import pandas as pd
import time

def get_reviews(product_url):
    # Set up Chrome options
    chrome_options = Options()
    chrome_options.add_argument("--headless")  
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")

    # Initialize the WebDriver
    driver = webdriver.Chrome(options=chrome_options)
    driver.get(product_url)
    time.sleep(3)  

    reviews = []

    try:
        # Click on the "See all reviews" link
        see_all_reviews_link = driver.find_element(By.XPATH, '//a[contains(@data-hook, "see-all-reviews-link-foot")]')
        see_all_reviews_link.click()
        time.sleep(3)  # Let the page load

        # Scrape multiple pages of reviews
        while True:
            # Get review elements
            review_blocks = driver.find_elements(By.XPATH, '//div[@data-hook="review"]')
            
            for review in review_blocks:
                try:
                    title = review.find_element(By.XPATH, './/a[@data-hook="review-title"]').text.strip()
                    rating = review.find_element(By.XPATH, './/i[@data-hook="review-star-rating"]').text.strip()
                    body = review.find_element(By.XPATH, './/span[@data-hook="review-body"]').text.strip()
                    date = review.find_element(By.XPATH, './/span[@data-hook="review-date"]').text.strip()
                    reviews.append({
                        'title': title,
                        'rating': rating,
                        'body': body,
                        'date': date
                    })
                except Exception as e:
                    print(f"Error parsing review: {e}")
                    continue
            
            # Check for next page button
            try:
                next_button = driver.find_element(By.XPATH, '//li[@class="a-last"]/a')
                next_button.click()
                time.sleep(3)  # Let the next page load
            except:
                print("No more pages or next button not found.")
                break

    finally:
        driver.quit()

    return reviews

def save_reviews_to_csv(reviews, filename):
    df = pd.DataFrame(reviews)
    df.to_csv(filename, index=False)
    print(f"Saved {len(reviews)} reviews to {filename}")

if __name__ == "__main__":
    product_url = 'https://www.amazon.in/Samsung-Galaxy-Flip5-Cream-Storage/dp/B0CB6BMFJR/ref=sr_1_1?crid=1M1XW9CK9Y8E8&dib=eyJ2IjoiMSJ9.CkkN4-k-hMcowouJlQNYg4SLOtX4l0MBgOSgZpzcZeWbEiLqJD-6PU3u7Nx_LugMYRVYGf66kkSG0zkNfzxHn11pnNx4PtfPaevPFkTaQH0FUl3lax5nFuNYcOL0SsjgJDkrWmrSFKfOrpz0m5JorB9EKHP_6pDD7dggQNsj5vgec8PUOgL4AEiu8YBLnxRghI7VKf8-LoGVnC8vIGx6A7xhvCEVqNX1zH9LAULrQQw.x7MuOshwuS_uhWmG9vzNaYkB2t0ERcAZW42ubBquCzM&dib_tag=se&keywords=zflip5%2Breview&qid=1716790683&sprefix=zflip5%2Breview%2Caps%2C224&sr=8-1&th=1#customerReviews'
    reviews = get_reviews(product_url)
    if reviews:
        save_reviews_to_csv(reviews, 'amazon_reviews.csv')
    else:
        print("No reviews found or failed to scrape reviews.")


No more pages or next button not found.
Saved 100 reviews to amazon_reviews.csv
