<a href="https://colab.research.google.com/github/P-eter-shi/Ai_for_web/blob/main/AmazonSentimentAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import spacy
from spacy import displacy
from spacy.tokens import Span
from spacy.matcher import PhraseMatcher
import pandas as pd
from collections import defaultdict
import random

# Set up spaCy with the English language model
nlp = spacy.load("en_core_web_lg")

# Sample Amazon product reviews
reviews = [
    "The Apple iPhone 13 Pro has an amazing camera but the battery life could be better.",
    "Samsung Galaxy S22 Ultra's display is stunning, though it's quite expensive.",
    "Bose QuietComfort 45 headphones have excellent noise cancellation and comfortable ear cups.",
    "Sony WH-1000XM4 headphones are terrible for calls but great for music.",
    "I love my new Dyson V11 vacuum cleaner - it's powerful and lightweight.",
    "The Microsoft Surface Pro 8 is a disappointment with its short battery life.",
    "Nintendo Switch OLED is fantastic for gaming on the go.",
    "Canon EOS R5 camera produces incredible images but overheats quickly.",
    "This Logitech MX Master 3 mouse is the worst product I've ever used.",
    "Amazon Echo Dot (4th Gen) has poor sound quality compared to Google Nest."
]

# Custom entity ruler for product names and brands
def setup_entity_ruler(nlp):
    # Patterns for common product categories
    patterns = [
        {"label": "PRODUCT", "pattern": [{"LOWER": "iphone"}, {"SHAPE": "dd"}, {"LOWER": "pro"}]},
        {"label": "PRODUCT", "pattern": [{"LOWER": "galaxy"}, {"LOWER": "s"}, {"SHAPE": "dd"}, {"LOWER": "ultra"}]},
        {"label": "PRODUCT", "pattern": [{"LOWER": "quietcomfort"}, {"SHAPE": "dd"}]},
        {"label": "PRODUCT", "pattern": [{"LOWER": "wh"}, {"SHAPE": "dddd"}, {"LOWER": "xm"}, {"SHAPE": "d"}]},
        {"label": "PRODUCT", "pattern": [{"LOWER": "surface"}, {"LOWER": "pro"}, {"SHAPE": "d"}]},
        {"label": "PRODUCT", "pattern": [{"LOWER": "echo"}, {"LOWER": "dot"}, {"TEXT": {"REGEX": "\(.*\)"}}]},
        {"label": "PRODUCT", "pattern": [{"LOWER": "mx"}, {"LOWER": "master"}, {"SHAPE": "d"}]},
        {"label": "PRODUCT", "pattern": [{"LOWER": "v"}, {"SHAPE": "dd"}]},
        {"label": "PRODUCT", "pattern": [{"LOWER": "eos"}, {"LOWER": "r"}, {"SHAPE": "d"}]}
    ]

    ruler = nlp.add_pipe("entity_ruler", before="ner")
    ruler.add_patterns(patterns)

    return nlp

# Enhance NER with known brands
def add_brand_entities(nlp):
    brands = {
        "ORG": ["Apple", "Samsung", "Bose", "Sony", "Dyson", "Microsoft", "Nintendo", "Canon", "Logitech", "Amazon", "Google"]
    }

    for label, items in brands.items():
        for item in items:
            doc = nlp(item)
            for ent in doc.ents:
                if ent.text == item:
                    break
            else:
                # Add the brand as an ORG entity if not already recognized
                new_ent = Span(doc, 0, len(doc), label=label)
                doc.ents = list(doc.ents) + [new_ent]

    return nlp

# Sentiment analysis rules
def analyze_sentiment(doc):
    # Define sentiment indicators
    positive_indicators = ["amazing", "stunning", "excellent", "great", "love", "fantastic", "incredible", "powerful", "lightweight"]
    negative_indicators = ["terrible", "worst", "disappointment", "poor", "could be better", "expensive", "short", "overheats"]

    # Initialize sentiment score
    sentiment_score = 0

    # Check for positive indicators
    for token in doc:
        if token.lemma_.lower() in positive_indicators:
            sentiment_score += 1
        elif token.lemma_.lower() in negative_indicators:
            sentiment_score -= 1

    # Check for negation
    for token in doc:
        if token.dep_ == "neg":
            sentiment_score *= -1

    # Determine overall sentiment
    if sentiment_score > 0:
        return "positive"
    elif sentiment_score < 0:
        return "negative"
    else:
        return "neutral"

# Process reviews and extract information
def process_reviews(reviews, nlp):
    results = []

    for review in reviews:
        doc = nlp(review)

        # Extract entities
        entities = [(ent.text, ent.label_) for ent in doc.ents]

        # Extract brands and products
        brands = [ent.text for ent in doc.ents if ent.label_ == "ORG"]
        products = [ent.text for ent in doc.ents if ent.label_ == "PRODUCT"]

        # Analyze sentiment
        sentiment = analyze_sentiment(doc)

        results.append({
            "review": review,
            "entities": entities,
            "brands": brands,
            "products": products,
            "sentiment": sentiment
        })

    return results

# Visualize NER results
def visualize_ner(reviews, nlp):
    for review in random.sample(reviews, 3):  # Visualize 3 random reviews
        doc = nlp(review)
        displacy.render(doc, style="ent", jupyter=False)

# Main function
def main(nlp):
    # Enhance the NLP pipeline
    nlp = setup_entity_ruler(nlp)
    nlp = add_brand_entities(nlp)

    # Process reviews
    results = process_reviews(reviews, nlp)

    # Create a DataFrame for better visualization
    df = pd.DataFrame(results)

    # Display results
    print("\nAnalysis Results:")
    print(df[["review", "brands", "products", "sentiment"]].to_string(index=False))

    # Visualize NER for sample reviews
    print("\nNER Visualization for Sample Reviews:")
    visualize_ner(reviews, nlp)

if __name__ == "__main__":
    main(nlp)


Analysis Results:
                                                                                     review                         brands          products sentiment
        The Apple iPhone 13 Pro has an amazing camera but the battery life could be better.                             []   [iPhone 13 Pro]  positive
               Samsung Galaxy S22 Ultra's display is stunning, though it's quite expensive.   [Samsung Galaxy S22 Ultra's]                []   neutral
Bose QuietComfort 45 headphones have excellent noise cancellation and comfortable ear cups.                             [] [QuietComfort 45]  positive
                     Sony WH-1000XM4 headphones are terrible for calls but great for music.              [Sony WH-1000XM4]                []   neutral
                    I love my new Dyson V11 vacuum cleaner - it's powerful and lightweight.                        [Dyson]                []  positive
               The Microsoft Surface Pro 8 is a disappointment with its sho

Named Entity Recognition:

    Custom entity ruler to identify product names (e.g., "iPhone 13 Pro", "WH-1000XM4")

    Brand recognition enhancement for common tech companies

    Visualization of entities in sample reviews

Sentiment Analysis:

    Rule-based system with positive/negative indicators

    Negation handling (e.g., "not great")

    Sentiment scoring with three categories (positive, neutral, negative)

Potential Biases in the Amazon Reviews Model:

    Language Bias:

        Only processes English reviews

        May miss cultural nuances in sentiment expression

    Product/Brand Bias:

        Entity recognition favors well-known brands

        May miss niche or regional products

    Sentiment Analysis Bias:

        Rule-based system favors obvious sentiment markers

        May misinterpret sarcasm or cultural expressions