In [2]:
# spacy_ner_sentiment.py
# Requirements: spacy
# pip install spacy
# python -m spacy download en_core_web_sm

import spacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")  # lightweight model

# Example reviews (replace with real Amazon review texts)
reviews = [
    "I bought the Acme Turbo Blender and it works great. Acme's customer service was helpful.",
    "Terrible battery life on the PowerMax headphones. Avoid this brand.",
    "Love the style of the Acme smartwatch. Great value.",
]

# Use spaCy NER to extract PRODUCT, ORG etc.
for doc in nlp.pipe(reviews):
    print("Review:", doc.text)
    ents = [(ent.text, ent.label_) for ent in doc.ents]
    print("Entities:", ents)

# If you need to specifically capture product names/brands more reliably,
# use a simple pattern-based matcher (example: capitalized Noun sequences)
matcher = Matcher(nlp.vocab)
pattern = [{"IS_TITLE": True, "POS": "PROPN"}, {"IS_TITLE": True, "POS": "PROPN", "OP": "?"}]
matcher.add("BRAND_PRODUCT", [pattern])

for text in reviews:
    doc = nlp(text)
    matches = matcher(doc)
    extracted = [doc[start:end].text for _, start, end in matches]
    print("Pattern matches:", extracted)

# Rule-based sentiment (very simple example)
positive_words = {"great", "excellent", "love", "wonderful", "helpful", "awesome", "good", "value"}
negative_words = {"terrible", "bad", "awful", "poor", "hate", "avoid", "disappointing", "worst"}

def simple_sentiment(text):
    doc = nlp(text.lower())
    pos = sum(1 for token in doc if token.lemma_ in positive_words)
    neg = sum(1 for token in doc if token.lemma_ in negative_words)
    if pos > neg:
        return "positive"
    elif neg > pos:
        return "negative"
    else:
        return "neutral"

for r in reviews:
    print(r)
    print("Sentiment:", simple_sentiment(r))
    print("---")


Review: I bought the Acme Turbo Blender and it works great. Acme's customer service was helpful.
Entities: [('Acme', 'ORG')]
Review: Terrible battery life on the PowerMax headphones. Avoid this brand.
Entities: [('PowerMax', 'ORG')]
Review: Love the style of the Acme smartwatch. Great value.
Entities: [('Acme', 'ORG')]
Pattern matches: ['Acme', 'Acme Turbo', 'Turbo', 'Turbo Blender', 'Blender', 'Acme']
Pattern matches: []
Pattern matches: ['Acme']
I bought the Acme Turbo Blender and it works great. Acme's customer service was helpful.
Sentiment: positive
---
Terrible battery life on the PowerMax headphones. Avoid this brand.
Sentiment: negative
---
Love the style of the Acme smartwatch. Great value.
Sentiment: positive
---
