In [1]:
import spacy

# Load the small English spaCy model
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("spaCy model 'en_core_web_sm' not found.")
    print("Please run: python -m spacy download en_core_web_sm")
    exit()

print("--- Task 3: spaCy NER and Sentiment ---")

# Sample Amazon review data
# In a real task, you'd load this from a CSV file (e.g., from Kaggle)
# We'll use the 'reviewText' column name as found in common Kaggle datasets
reviews = [
    {
        "id": 1,
        "reviewText": "I bought the new Sony WH-1000XM4 headphones last week. "
                      "They are amazing! The noise cancellation is better than my old Bose QC35."
    },
    {
        "id": 2,
        "reviewText": "This Anker USB-C hub is a terrible product. It stopped working after just one day. "
                      "I am returning it and buying a UGREEN one instead."
    },
    {
        "id": 3,
        "reviewText": "The new Apple MacBook Pro with the M2 chip is fantastic. "
                      "I also got a Logitech MX Master 3 mouse to go with it."
    }
]

# 1. Goal: Named Entity Recognition (NER)
print("\n--- 1. Named Entity Recognition (NER) ---")
for review in reviews:
    print(f"\nReview ID: {review['id']}")
    doc = nlp(review['reviewText'])

    print("  Entities Found:")
    found_entities = False
    for ent in doc.ents:
        # We only care about Products (PRODUCT) and Brands/Orgs (ORG)
        if ent.label_ in ["PRODUCT", "ORG"]:
            print(f"    - Text: '{ent.text}', Label: '{ent.label_}'")
            found_entities = True

    if not found_entities:
        print("    - No PRODUCT or ORG entities found.")


# 2. Goal: Rule-Based Sentiment Analysis
print("\n--- 2. Rule-Based Sentiment Analysis ---")

# Define simple keyword lists for our rules
POSITIVE_KEYWORDS = ["amazing", "fantastic", "great", "love", "best", "excellent"]
NEGATIVE_KEYWORDS = ["terrible", "bad", "hate", "stopped working", "worst", "awful", "return"]

for review in reviews:
    print(f"\nReview ID: {review['id']}")

    # Store the text in a lowercase variable for easy matching
    text_lower = review['reviewText'].lower()

    # Check for keywords
    is_positive = any(word in text_lower for word in POSITIVE_KEYWORDS)
    is_negative = any(word in text_lower for word in NEGATIVE_KEYWORDS)

    sentiment = "Neutral"
    if is_positive and not is_negative:
        sentiment = "Positive"
    elif is_negative and not is_positive:
        sentiment = "Negative"
    elif is_positive and is_negative:
        sentiment = "Mixed" # e.g., "The product was great, but the shipping was terrible"

    print(f"  Sentiment: {sentiment}")

print("\n-----------------------------------------")

--- Task 3: spaCy NER and Sentiment ---

--- 1. Named Entity Recognition (NER) ---

Review ID: 1
  Entities Found:
    - Text: 'Sony', Label: 'ORG'
    - Text: 'Bose QC35', Label: 'ORG'

Review ID: 2
  Entities Found:
    - No PRODUCT or ORG entities found.

Review ID: 3
  Entities Found:
    - Text: 'Apple MacBook Pro', Label: 'ORG'
    - Text: 'Logitech', Label: 'ORG'
    - Text: 'MX', Label: 'PRODUCT'

--- 2. Rule-Based Sentiment Analysis ---

Review ID: 1
  Sentiment: Positive

Review ID: 2
  Sentiment: Negative

Review ID: 3
  Sentiment: Positive

-----------------------------------------
