In [None]:

import nltk
nltk.download('all')

In [None]:
#Upload Amazon Reviews
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files


# Here is our Test Dataset

In [None]:
dog_food_reviews = [
    # Cluster 1: Smell is appealing
    "This dog food smells amazingâ€”almost like a chicken stew.",
    "It has a delicious smell that even I find pleasant.",
    "The aroma is rich and savoryâ€”my dog comes running.",
    "Way better-smelling than other brands Iâ€™ve tried.",
    "It actually smells like real chicken and rice.",
    "The pleasant aroma fills the whole kitchen.",
    "I love how fresh and wholesome it smells.",
    "The smell alone made me confident it was good quality.",
    "Smells better than some canned human soups!",
    "Even picky dogs react instantly to the smell.",

    # Cluster 2: Dogs eagerly eat it
    "My dog devours it in seconds.",
    "She licks the bowl clean every single time.",
    "My picky poodle actually begs for more now.",
    "Even my older dog with little appetite finishes it.",
    "My puppy refuses any other food now.",
    "They love it so much, we had to hide the bag.",
    "My dogs dance around every feeding time now.",
    "No more coaxing or tricking them to eat.",
    "Itâ€™s like a treat to them, not just food.",
    "Absolutely no leftoversâ€”ever!",

    # Cluster 3: Coat, digestion, and health improvements
    "My dog's coat is shinier and softer after a month.",
    "Fewer rashes and much less itching now.",
    "My senior dog seems more energetic and mobile.",
    "Stool is firmer and less smelly than before.",
    "Helped with my bulldogâ€™s constant scratching.",
    "My Labrador's fur looks glossy and healthy.",
    "No more digestive issues after meals.",
    "My dogâ€™s breath even smells a little better!",
    "Sheâ€™s visibly more playful and active.",
    "Noticeable weight improvementâ€”no more being underweight.",

    # Cluster 4: Price concerns (positive and negative opinions mixed)
    "Itâ€™s a bit pricey for the amount you get.",
    "Definitely not the cheapest, but the quality is worth it.",
    "Wish there were larger bulk options for savings.",
    "Great quality, but I can only afford it sometimes.",
    "You pay more, but my dog's health improved a lot.",
    "Could be more affordable for multi-dog households.",
    "Worth the investment for sensitive dogs.",
    "Not budget-friendly but no vet visits anymore.",
    "I buy smaller bags to control spending.",
    "Higher priced than competitors, but way better results.",

    # Cluster 5: Negative experiences
    "Made my dog throw up twiceâ€”had to stop using it.",
    "Smelled weird and stale right out of the bag.",
    "My dog wouldn't even touch it.",
    "Caused really bad gas for three days.",
    "He had an allergic reaction and started itching again.",
    "Transitioning was toughâ€”lots of upset stomachs.",
    "Arrived late and the bag was ripped.",
    "Caused constipation in my Chihuahua.",
    "Too rich for my small dogâ€™s digestion.",
    "Had to switch brands after a bad reaction."
]

# Simple Summarization

In [None]:
from transformers import pipeline

# Step 1: Load summarizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0)  # Use CPU (set device=0 for GPU)

# Step 3: Combine reviews (watch out for token limits!)
combined_reviews = " ".join(dog_food_reviews)  # Use fewer if you hit max length issues

# Step 4: Generate summary
#summary = summarizer(combined_reviews, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
summary = summarizer(combined_reviews, max_length=130, min_length=30, do_sample=False)[0]['summary_text']


print("ðŸ“¦ Summarized Review:")
print(summary)


# Summarization by Chunking Reviews

In [None]:
from transformers import pipeline

# Step 1: Load summarizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0)  # Use CPU (set device=0 for GPU)

def chunk_reviews(reviews, chunk_size):
    for i in range(0, len(reviews), chunk_size):
        yield reviews[i:i + chunk_size]

def summarize_reviews_in_chunks(reviews, chunk_size=10):
    summaries = []
    for chunk in chunk_reviews(reviews, chunk_size):
        text = " ".join(chunk)
        summary = summarizer(text, max_length=100, min_length=25, do_sample=False)[0]['summary_text']
        summaries.append(summary)
    return summaries

# Step 1: Summarize reviews in batches
intermediate_summaries = summarize_reviews_in_chunks(dog_food_reviews, chunk_size=10)

# Step 2 (Optional): Summarize all summaries into 1 final summary
final_summary = summarizer(" ".join(intermediate_summaries), max_length=130, min_length=30, do_sample=False)[0]['summary_text']


print("Final Summarized Review:")
print(final_summary)

# First Classify each review as either Positive or Negative, then summarize all positive reviews and negative reviews separately to reflect both positive and negative sentiment

Flaw is that the pipeline("sentiment-analysis") is not accurate in capturing the true sentiment, results in statements like "My dog devours it in seconds" to be classified as negative

In [None]:
from transformers import pipeline

# Load models
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0)
sentiment_classifier = pipeline("sentiment-analysis")

def sentiment_aware_summary(reviews, chunk_size=10, max_len=100, min_len=25):
    # Step 1: Classify sentiment
    tagged = [(review, sentiment_classifier(review)[0]['label']) for review in reviews]

    # Step 2: Split reviews by sentiment
    pos_reviews = [r for r, label in tagged if label == "POSITIVE"]
    neg_reviews = [r for r, label in tagged if label == "NEGATIVE"]

    def summarize_group(group_reviews, label):
        if not group_reviews:
            return f"{label} Summary: (No reviews)\n"
        chunks = [" ".join(group_reviews[i:i+chunk_size]) for i in range(0, len(group_reviews), chunk_size)]
        summaries = [
            summarizer(chunk, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']
            for chunk in chunks
        ]
        return f"{label} Summary:\n" + " ".join(summaries) + "\n"

    # Step 3: Summarize each group
    positive_summary = summarize_group(pos_reviews, "ðŸŸ¢ Positive")
    negative_summary = summarize_group(neg_reviews, "ðŸ”´ Negative")

    full_summary = summarizer(positive_summary + negative_summary, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']

    # Step 4: Combine
    final_output = "ðŸ“¦ Sentiment-Aware Review Summary\n\n" + "Full Summary\n" + full_summary + "\n\n" + positive_summary + "\n" + negative_summary
    return final_output

print(sentiment_aware_summary(dog_food_reviews))



In [None]:
from transformers import pipeline

# Load models
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=0)
sentiment_classifier = pipeline("sentiment-analysis")


def sentiment_aware_summary(reviews, chunk_size=10, max_len=100, min_len=25):
    # Step 1: Classify sentiment
    tagged = [(review, sentiment_classifier(review)[0]['label']) for review in reviews]

    # Step 2: Split reviews by sentiment
    pos_reviews = [r for r, label in tagged if label == "POSITIVE"]
    neg_reviews = [r for r, label in tagged if label == "NEGATIVE"]

    print(pos_reviews[:5])
    print(neg_reviews[:5])

    def summarize_group(group_reviews, label):
        if not group_reviews:
            return f"{label} Summary: (No reviews)\n"
        chunks = [" ".join(group_reviews[i:i+chunk_size]) for i in range(0, len(group_reviews), chunk_size)]
        summaries = [
            summarizer(chunk, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']
            for chunk in chunks
        ]
        return f"{label} Summary:\n" + " ".join(summaries) + "\n"

    # Step 3: Summarize each group
    positive_summary = summarize_group(pos_reviews, "ðŸŸ¢ Positive")
    negative_summary = summarize_group(neg_reviews, "ðŸ”´ Negative")

    full_summary = summarizer(positive_summary + negative_summary, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']

    # Step 4: Combine
    final_output = "ðŸ“¦ Sentiment-Aware Review Summary\n\n" + "Full Summary\n" + full_summary + "\n\n" + positive_summary + "\n" + negative_summary
    return final_output

print(sentiment_aware_summary(dog_food_reviews))



# Sentiment Analysis using Roberta pretrained model that takes into account word context

In [None]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax

MODEL=f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer=AutoTokenizer.from_pretrained(MODEL)
model=AutoModelForSequenceClassification.from_pretrained(MODEL)


def sentiment_aware_summary(reviews, chunk_size=10, max_len=100, min_len=25):

  pos_reviews = []
  neg_reviews = []

  def get_roberta_sentiment_scores(review):
    encoded_text = tokenizer(review, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    return {
        'Negative': scores[0],
        'Neutral': scores[1],
        'Positive': scores[2]
    }

  for review in dog_food_reviews:
    scores = get_roberta_sentiment_scores(review)
    if scores['Positive'] > scores['Negative']:
      pos_reviews.append(review)
    else:
      neg_reviews.append(review)

  def summarize_group(group_reviews, label):
        if not group_reviews:
            return f"{label} Summary: (No reviews)\n"
        chunks = [" ".join(group_reviews[i:i+chunk_size]) for i in range(0, len(group_reviews), chunk_size)]
        summaries = [
            summarizer(chunk, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']
            for chunk in chunks
        ]
        return f"{label} Summary:\n" + " ".join(summaries) + "\n"

  # Step 3: Summarize each group
  positive_summary = summarize_group(pos_reviews, "ðŸŸ¢ Positive")
  negative_summary = summarize_group(neg_reviews, "ðŸ”´ Negative")

  full_summary = summarizer(positive_summary + negative_summary, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']

  # Step 4: Combine
  final_output = "ðŸ“¦ Sentiment-Aware Review Summary\n\n" + "Full Summary\n" + full_summary + "\n\n" + positive_summary + "\n" + negative_summary
  return final_output

print(sentiment_aware_summary(dog_food_reviews))

