In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
import time
from bs4 import BeautifulSoup
import os

driver_path = "chromedriver.exe"

BASE_URL = "https://www.amazon.com/Unlocked-SM-G986U-Fingerprint-Recognition-Long-Lasting/product-reviews/B08FRR54HL/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"

def fetch_reviews_with_product_name_and_pagination(url):
    chrome_options = Options()
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    # chrome_options.add_argument("--headless")

    service = Service(driver_path)
    driver = None
    reviews = []

    try:
        driver = webdriver.Chrome(service=service, options=chrome_options)
        driver.get(url)

        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "product-info-title"))
        )

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        product_name_tag = soup.find('h1', class_='product-info-title')
        full_product_name = product_name_tag.text.strip() if product_name_tag else "Unknown Product"
e
        product_name = " ".join(full_product_name.split()[:3])

        while True:
            WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.ID, "cm_cr-review_list"))
            )

            soup = BeautifulSoup(driver.page_source, 'html.parser')

            review_elements = soup.find_all('div', {'data-hook': 'review'})
            if not review_elements:
                print("No reviews found on this page.")
                break

            for review in review_elements:
                try:
                    # Extract rating
                    rating_tag = review.find('i', {'data-hook': 'review-star-rating'})
                    rating = rating_tag.text.strip() if rating_tag else "No Rating"

                    # Extract username
                    username_tag = review.find('span', {'class': 'a-profile-name'})
                    username = username_tag.text.strip() if username_tag else "Anonymous"

                    # Extract review text
                    review_text_tag = review.find('span', {'data-hook': 'review-body'})
                    review_text = review_text_tag.text.strip() if review_text_tag else "No review text"

                    reviews.append({
                        "product_name": product_name,
                        "username": username,
                        "rating": rating,
                        "review_text": review_text
                    })
                except Exception as e:
                    print(f"Error processing review: {e}")
                    continue

            try:
                next_button = driver.find_element(By.CLASS_NAME, "a-last")
                if "a-disabled" in next_button.get_attribute("class"):
                    print("No more pages to scrape.")
                    break
                else:
                    print("Navigating to the next page...")
                    next_button.click()
                    time.sleep(2)
            except Exception as e:
                print(f"No more pages or error navigating: {e}")
                break

        return reviews

    except Exception as e:
        print(f"Error initializing WebDriver: {e}")
        return []

    finally:
        if driver is not None:
            driver.quit()

def save_to_csv(reviews, filename="amazon_reviews.csv"):
    headers = ["product_name", "username", "rating", "review_text"]

    # Check if the file exists
    file_exists = os.path.isfile(filename)
    existing_reviews = set()

    if file_exists and os.path.getsize(filename) > 0:
        with open(filename, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            try:
                for row in reader:
                    existing_reviews.add((row['product_name'], row['username'], row['review_text']))
            except KeyError:
                print("CSV file is malformed or has missing headers. Rewriting the file.")
                file_exists = False

    new_reviews = []
    for review in reviews:
        unique_key = (review['product_name'], review['username'], review['review_text'])
        if unique_key not in existing_reviews:
            new_reviews.append(review)
            existing_reviews.add(unique_key)

    with open(filename, 'w' if not file_exists else 'a', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=headers)
        if not file_exists:
            writer.writeheader()
        writer.writerows(new_reviews)

    if new_reviews:
        print(f"Appended {len(new_reviews)} new reviews to '{filename}'.")
    else:
        print("No new reviews to append.")

if __name__ == "__main__":
    reviews = fetch_reviews_with_product_name_and_pagination(BASE_URL)
    save_to_csv(reviews)