In [2]:
# Task 3: spaCy NER + rule-based sentiment
import spacy
from pathlib import Path

In [4]:
# 1) Load small English model (auto-download if missing)
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")


In [7]:

# 2) Load reviews
# Expect a CSV "reviews.csv" with a column 'review_text'. If not found, use samples.
import pandas as pd
if Path("reviews.csv").exists():
    df = pd.read_csv("reviews.csv")
    texts = df["review_text"].astype(str).tolist()[:20]
else:
    texts = [
        "I love the Sony headphones — the noise cancellation is amazing!",
        "Terrible battery life on this Samsung phone. Not worth the price.",
        "Apple AirPods sound great but the case scratches easily.",
        "The Nike running shoes are super comfortable and lightweight.",
        "Amazon Basics USB-C cable works fine, great value for money."
    ]

In [8]:
# 3) Simple rule-based sentiment (very small lexicon demo)
POSITIVE_WORDS = {"amazing","love","great","super","comfortable","lightweight","fine","value"}
NEGATIVE_WORDS = {"terrible","bad","poor","hate","not worth","scratches","broken","slow"}

def rule_sentiment(text:str):
    t = text.lower()
    pos = sum(w in t for w in POSITIVE_WORDS)
    neg = sum(w in t for w in NEGATIVE_WORDS)
    if pos > neg: return "positive"
    if neg > pos: return "negative"
    return "neutral"

In [9]:
# 4) Run NER + sentiment
for t in texts:
    doc = nlp(t)
    ents = [(e.text, e.label_) for e in doc.ents if e.label_ in {"ORG","PRODUCT"}]
    print(f"TEXT: {t}")
    print(f"Entities (ORG/PRODUCT): {ents}")
    print(f"Sentiment (rule-based): {rule_sentiment(t)}")
    print("-"*80)

TEXT: I love the Sony headphones — the noise cancellation is amazing!
Entities (ORG/PRODUCT): [('Sony', 'ORG')]
Sentiment (rule-based): positive
--------------------------------------------------------------------------------
TEXT: Terrible battery life on this Samsung phone. Not worth the price.
Entities (ORG/PRODUCT): [('Samsung', 'ORG')]
Sentiment (rule-based): negative
--------------------------------------------------------------------------------
TEXT: Apple AirPods sound great but the case scratches easily.
Entities (ORG/PRODUCT): [('Apple AirPods', 'ORG')]
Sentiment (rule-based): neutral
--------------------------------------------------------------------------------
TEXT: The Nike running shoes are super comfortable and lightweight.
Entities (ORG/PRODUCT): [('Nike', 'ORG')]
Sentiment (rule-based): positive
--------------------------------------------------------------------------------
TEXT: Amazon Basics USB-C cable works fine, great value for money.
Entities (ORG/PRODUCT): [