In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
import time
from bs4 import BeautifulSoup
import os

driver_path = "chromedriver.exe"

BASE_URL = "https://www.amazon.com/Unlocked-SM-G986U-Fingerprint-Recognition-Long-Lasting/product-reviews/B08FRR54HL/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"

def fetch_reviews_with_product_name_and_pagination(url):
    chrome_options = Options()
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    # chrome_options.add_argument("--headless")

    service = Service(driver_path)
    driver = None
    reviews = []

    try:
        driver = webdriver.Chrome(service=service, options=chrome_options)
        driver.get(url)

        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "product-info-title"))
        )

        soup = BeautifulSoup(driver.page_source, 'html.parser')
        product_name_tag = soup.find('h1', class_='product-info-title')
        full_product_name = product_name_tag.text.strip() if product_name_tag else "Unknown Product"
e
        product_name = " ".join(full_product_name.split()[:3])

        while True:
            WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.ID, "cm_cr-review_list"))
            )

            soup = BeautifulSoup(driver.page_source, 'html.parser')

            review_elements = soup.find_all('div', {'data-hook': 'review'})
            if not review_elements:
                print("No reviews found on this page.")
                break

            for review in review_elements:
                try:
                    # Extract rating
                    rating_tag = review.find('i', {'data-hook': 'review-star-rating'})
                    rating = rating_tag.text.strip() if rating_tag else "No Rating"

                    # Extract username
                    username_tag = review.find('span', {'class': 'a-profile-name'})
                    username = username_tag.text.strip() if username_tag else "Anonymous"

                    # Extract review text
                    review_text_tag = review.find('span', {'data-hook': 'review-body'})
                    review_text = review_text_tag.text.strip() if review_text_tag else "No review text"

                    reviews.append({
                        "product_name": product_name,
                        "username": username,
                        "rating": rating,
                        "review_text": review_text
                    })
                except Exception as e:
                    print(f"Error processing review: {e}")
                    continue

            try:
                next_button = driver.find_element(By.CLASS_NAME, "a-last")
                if "a-disabled" in next_button.get_attribute("class"):
                    print("No more pages to scrape.")
                    break
                else:
                    print("Navigating to the next page...")
                    next_button.click()
                    time.sleep(2)
            except Exception as e:
                print(f"No more pages or error navigating: {e}")
                break

        return reviews

    except Exception as e:
        print(f"Error initializing WebDriver: {e}")
        return []

    finally:
        if driver is not None:
            driver.quit()

def save_to_csv(reviews, filename="amazon_reviews.csv"):
    headers = ["product_name", "username", "rating", "review_text"]

    # Check if the file exists
    file_exists = os.path.isfile(filename)
    existing_reviews = set()

    if file_exists and os.path.getsize(filename) > 0:
        with open(filename, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            try:
                for row in reader:
                    existing_reviews.add((row['product_name'], row['username'], row['review_text']))
            except KeyError:
                print("CSV file is malformed or has missing headers. Rewriting the file.")
                file_exists = False

    new_reviews = []
    for review in reviews:
        unique_key = (review['product_name'], review['username'], review['review_text'])
        if unique_key not in existing_reviews:
            new_reviews.append(review)
            existing_reviews.add(unique_key)

    with open(filename, 'w' if not file_exists else 'a', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=headers)
        if not file_exists:
            writer.writeheader()
        writer.writerows(new_reviews)

    if new_reviews:
        print(f"Appended {len(new_reviews)} new reviews to '{filename}'.")
    else:
        print("No new reviews to append.")

if __name__ == "__main__":
    reviews = fetch_reviews_with_product_name_and_pagination(BASE_URL)
    save_to_csv(reviews)

In [None]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.sentiment import SentimentIntensityAnalyzer

# nltk.download('punkt')
# nltk.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()

aspects = ['camera', 'battery', 'screen', 'performance', 'speed', 'design', 'sound', 'quality']

input_file = "total_phone_reviews.csv"
output_file = "total_review_sentiments.csv"

reviews_df = pd.read_csv(input_file)

if 'review' not in reviews_df.columns or 'model' not in reviews_df.columns:
    raise ValueError("The input CSV file must contain 'model' and 'review' columns.")

def extract_aspects(review, aspect_keywords):
    tokens = word_tokenize(review.lower())

    found_aspects = [aspect for aspect in aspect_keywords if aspect in tokens]

    return found_aspects

def classify_sentiment_and_phrase(review, aspect):
    sentences = sent_tokenize(review.lower())

    for sentence in sentences:
        if aspect in sentence:
            sentiment_score = sia.polarity_scores(sentence)
            compound_score = sentiment_score['compound']

            if compound_score >= 0.05:
                sentiment = 'Positive'
            elif compound_score <= -0.05:
                sentiment = 'Negative'
            else:
                sentiment = 'Neutral'

            return sentiment, sentence

    return None, None

sentiment_results = []

for index, row in reviews_df.iterrows():
    review = row['review']
    model = row['model']

    if isinstance(review, str):
        aspects_found = extract_aspects(review, aspects)

        review_sentiments = {'model': model, 'review': review}

        for aspect in aspects_found:
            sentiment, full_phrase = classify_sentiment_and_phrase(review, aspect)

            if sentiment:
                review_sentiments[aspect] = sentiment
                review_sentiments[f"{aspect}_full_phrase"] = full_phrase

        sentiment_results.append(review_sentiments)
    else:
        print(f"Skipping invalid review: {review}")

sentiment_df = pd.DataFrame(sentiment_results)

sentiment_df.to_csv(output_file, index=False)

print(f"Sentiment analysis completed. Results saved to '{output_file}'.")


In [None]:
import pandas as pd
from transformers import pipeline

df = pd.read_csv('total_review_sentiments.csv')

df['model'] = df['model'].str.lower()

def summarize_phone_sentiments(model_name):
    phone_reviews = df[df['model'] == model_name]

    if phone_reviews.empty:
        raise ValueError(f"No reviews found for the model '{model_name}'.")

    aspect_columns = [col for col in phone_reviews.columns if col.endswith("_full_phrase")]
    if not aspect_columns:
        raise ValueError(f"No aspect columns found in the dataset for model '{model_name}'.")

    full_phrases = []
    for col in aspect_columns:
        full_phrases.extend(phone_reviews[col].dropna().tolist())
    combined_text = " ".join(full_phrases)

    if len(combined_text.split()) < 10:
        return "Not enough data to generate a meaningful summary."

    max_input_length = 1024
    combined_text = combined_text[:max_input_length]

    summarizer = pipeline('summarization', model='facebook/bart-large-cnn')

    max_length = min(300, len(combined_text.split()) + 20)

    summary = summarizer(combined_text, max_length=max_length, min_length=50, do_sample=False)
    return summary[0]['summary_text']

if __name__ == "__main__":
    unique_models = df['model'].unique()
    print("Available phone models:")
    for idx, model in enumerate(unique_models, start=1):
        print(f"{idx}. {model}")

    model_name = input("\nEnter the phone model name: ").strip().lower()

    try:
        summary = summarize_phone_sentiments(model_name)
        print(f"\nSummary for '{model_name}':\n{summary}")
    except ValueError as e:
        print(f"Error: {e}")


Summary for SAMSUNG Galaxy S22:
The camera is perfect the sound is perfect, everything works somewhat fine other then the screen. 60 hz feels awful to use with the highest resolution and the screen is some cheap 3rd party screen that's unresponsive at times and not smooth. buy a used one from a resell site or buy one new. just broke my phone and replacing it.


Summary for SAMSUNG Galaxy A25:
The phone is fantastic with good battery timing and camera results. powerful cpu and 8gb ram draw a lot of power and make the phone really fast but the battery is great quality to last 4 days. The microphone seems to have an issue and the battery life is extremely short.

Summary for SAMSUNG Galaxy S22:
The camera is perfect the sound is perfect, everything works somewhat fine other then the screen. 60 hz feels awful to use with the highest resolution and the screen is some cheap 3rd party screen that's unresponsive at times and not smooth. buy a used one from a resell site or buy one new. just broke my phone and replacing it.