In [None]:
# Cell 1: Import Libraries
import spacy

# Load the English spaCy model (ensure you've run 'python -m spacy download en_core_web_sm' in terminal)
try:
    nlp = spacy.load("en_core_web_sm")
    print("spaCy model loaded successfully!")
except OSError:
    print("SpaCy model not found. Please run 'python -m spacy download en_core_web_sm' in your terminal and try again.")
    exit() # Exit if model not loaded

In [None]:
# Cell 2: Define Sample Review Texts
review_texts = [
    "The new iPhone 15 Pro is an amazing device. Apple has outdone themselves.",
    "This Samsung Galaxy S24 has a terrible battery life. Very disappointed with the brand.",
    "Excellent Bose QuietComfort headphones! Sound quality is superb.",
    "I bought a cheap knockoff charger, it stopped working in a week. Don't waste your money.",
    "The Sony PlayStation 5 is fantastic for gaming, but it's often out of stock.",
    "My new Kindle Oasis arrived quickly. It's great for reading, a truly portable library.",
    "Terrible experience with this Dell XPS laptop, constant crashes."
]

print("Sample review texts defined.")

In [None]:
# Cell 3: Perform Named Entity Recognition (NER)
print("--- Named Entity Recognition (NER) ---")
extracted_entities = []

for i, text in enumerate(review_texts):
    doc = nlp(text)
    entities_in_review = []
    print(f"\nReview {i+1}: \"{text}\"")
    for ent in doc.ents:
        # We're primarily interested in products, organizations, and potentially people (as brand founders)
        if ent.label_ in ["ORG", "PRODUCT", "GPE", "PERSON", "NORP"]: # Added more common relevant labels
            entities_in_review.append({"text": ent.text, "label": ent.label_})
            print(f"  - Entity: '{ent.text}' (Type: {ent.label_})")
    extracted_entities.append(entities_in_review)

In [None]:
# Cell 4: Analyze Sentiment (Rule-Based Approach)
print("\n--- Sentiment Analysis (Rule-Based) ---")

positive_words = ["amazing", "excellent", "superb", "fantastic", "great", "love", "good", "happy", "satisfied", "quick"]
negative_words = ["terrible", "disappointed", "stopped working", "waste", "cheap", "constant crashes", "bad", "slow", "poor"]

def analyze_sentiment_rule_based(text):
    text_lower = text.lower()
    pos_score = sum(1 for word in positive_words if word in text_lower)
    neg_score = sum(1 for word in negative_words if word in text_lower)

    if pos_score > neg_score:
        return "Positive"
    elif neg_score > pos_score:
        return "Negative"
    else:
        return "Neutral" # Or if pos_score == neg_score

for i, text in enumerate(review_texts):
    sentiment = analyze_sentiment_rule_based(text)
    print(f"\nReview {i+1}: \"{text}\"")
    print(f"  - Sentiment: {sentiment}")

In [None]:
# Cell 1: Import Libraries
import spacy
import pandas as pd
import random

print(f"spaCy Version: {spacy.__version__}")
print("Libraries imported successfully!")

In [None]:
# Cell 2: Load spaCy English Model
try:
    # Load the small English model
    nlp = spacy.load("en_core_web_sm")
    print("spaCy 'en_core_web_sm' model loaded successfully.")
except OSError:
    print("spaCy model 'en_core_web_sm' not found. Downloading...")
    spacy.cli.download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")
    print("spaCy model 'en_core_web_sm' downloaded and loaded successfully.")
    

In [None]:
# Cell 3: Sample Text Data (Amazon Reviews style)
amazon_reviews = [
    "The product is excellent! Very happy with the purchase.",
    "Battery life is terrible, died after 2 hours. Very disappointed.",
    "Works as expected, good value for money. Highly recommended.",
    "This is the worst item I've ever bought. A complete waste of money.",
    "It's okay, not great, not bad. Just mediocre.",
    "Fantastic performance, totally exceeded my expectations!",
    "Wish it had more features, but it's decent for the price.",
    "The delivery was fast, but the item was damaged.",
    "Absolutely love this! The design is sleek and it's so easy to use.",
    "Received a broken one. Customer service was unhelpful."
]

print("Sample Amazon reviews loaded.")

In [None]:
# Cell 4: Tokenization, POS Tagging, and Lemmatization
print("--- Tokenization, POS Tagging, and Lemmatization ---")
for i, text in enumerate(amazon_reviews[:3]): # Process first 3 reviews for brevity
    doc = nlp(text)
    print(f"\nReview {i+1}: '{text}'")
    print(f"{'Token':<15} {'Lemma':<15} {'POS':<10} {'Is Alpha?':<10} {'Stopword?':<10}")
    print("-" * 70)
    for token in doc:
        print(f"{str(token):<15} {token.lemma_:<15} {token.pos_:<10} {str(token.is_alpha):<10} {str(token.is_stop):<10}")

In [None]:
# Cell 5: Named Entity Recognition (NER)
print("\n--- Named Entity Recognition (NER) ---")
for i, text in enumerate(amazon_reviews):
    doc = nlp(text)
    if doc.ents:
        print(f"\nReview {i+1}: '{text}'")
        for ent in doc.ents:
            print(f"  Entity: {ent.text}, Type: {ent.label_}, SpaCy Explanation: {spacy.explain(ent.label_)}")
    else:
        print(f"\nReview {i+1}: '{text}' - No entities found.")

In [None]:
# Cell 6: Basic Rule-Based Sentiment Analysis (Illustrative - very simple)
print("\n--- Basic Rule-Based Sentiment Analysis (Illustrative) ---")

positive_words = ["excellent", "happy", "good", "recommended", "fantastic", "love", "sleek", "easy"]
negative_words = ["terrible", "disappointed", "worst", "waste", "mediocre", "damaged", "unhelpful", "broken"]

def simple_sentiment(text):
    doc = nlp(text.lower()) # Process lowercase text
    sentiment_score = 0
    for token in doc:
        if token.text in positive_words:
            sentiment_score += 1
        elif token.text in negative_words:
            sentiment_score -= 1
    if sentiment_score > 0:
        return "Positive"
    elif sentiment_score < 0:
        return "Negative"
    else:
        return "Neutral"

for i, review in enumerate(amazon_reviews):
    sentiment = simple_sentiment(review)
    print(f"Review {i+1}: '{review}'\n  Sentiment: {sentiment}\n")

print("\nNote: This is a very simplistic rule-based sentiment analysis.")
print("It lacks context understanding, sarcasm detection, and nuances. For example:")
print("Review: 'This is great, another broken item!' (Should be Negative)")
doc_sarcasm = nlp("This is great, another broken item!")
print(f"  Simple rule-based analysis: {simple_sentiment(str(doc_sarcasm))}")
print("\nAdvanced NLP (like machine learning models or deep learning) is needed for robust sentiment analysis.")