In [None]:

# Task 3: NLP with spaCy - NER and Sentiment on Amazon Reviews

import spacy
from spacy import displacy
from spacy.matcher import Matcher
import re
from pathlib import Path

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Sample Amazon reviews
reviews = [
    "I absolutely love the Sony headphones! Great sound quality and battery life.",
    "This Samsung phone has a terrible screen resolution. Would not recommend.",
    "The Nike running shoes are very comfortable and stylish. Highly recommend!",
    "Avoid the XYZ blender. Poor build quality and too noisy.",
    "Apple MacBook is a bit expensive but worth every penny for performance."
]

# Load Amazon Reviews for Sentiment Analysis text 
FILE_PATH = Path("test.ft.txt")

POS_LEX = {
    "love", "great", "excellent", "amazing", "wonderful", "fantastic",
    "highly recommend", "worth", "favorite", "best", "incredible", "marvelous",
}
NEG_LEX = {
    "bad", "terrible", "awful", "horrible", "poor", "worst",
    "broken", "useless", "avoid", "disappointing", "not recommend", "crapped"
}

def remove_label(line: str):
    """Remove leading __label__X and return (gold_sentiment, text)."""
    m = re.match(r"__label__([01-9])\s+(.*)", line)
    if not m:
        return None, line.strip()
    gold = "Positive" if m.group(1) == "2" else "Negative"
    return gold, m.group(2).strip()


# Perform NER and sentiment analysis
for i, line in enumerate(FILE_PATH.read_text(encoding="utf-8").splitlines() if FILE_PATH.exists() else reviews):
    
    review = line.strip()
    if not review:
        continue

    gold, review = remove_label(review)
    doc = nlp(review)

    print(f"\nReview {i}: {review}")
    print("Named Entities:")
    for ent in doc.ents:
        print(f"  - {ent.text} ({ent.label_})")
    
    # Simple rule-based sentiment analysis
    #positive_words = ["love", "great", "comfortable", "highly recommend", "worth"]
    #negative_words = ["terrible", "avoid", "poor", "noisy", "not recommend"]
    sentiment_score = sum(word in review.lower() for word in POS_LEX) - sum(word in review.lower() for word in NEG_LEX)
    sentiment = "Positive" if sentiment_score > 0 else "Negative" if sentiment_score < 0 else "Neutral"
    print(f"Sentiment: {sentiment}")



Review 0: I absolutely love the Sony headphones! Great sound quality and battery life.
Named Entities:
  - Sony (ORG)
Sentiment: Positive

Review 1: This Samsung phone has a terrible screen resolution. Would not recommend.
Named Entities:
  - Samsung (ORG)
Sentiment: Negative

Review 2: The Nike running shoes are very comfortable and stylish. Highly recommend!
Named Entities:
  - Nike (ORG)
Sentiment: Positive

Review 3: Avoid the XYZ blender. Poor build quality and too noisy.
Named Entities:
  - XYZ (ORG)
Sentiment: Negative

Review 4: Apple MacBook is a bit expensive but worth every penny for performance.
Named Entities:
  - Apple MacBook (ORG)
  - every penny (MONEY)
Sentiment: Positive
