# NLTK Complete Guide - Section 13: Sentiment Analysis

This notebook covers:
- VADER Sentiment Analyzer
- SentiWordNet
- Custom Sentiment Analysis
- Practical Applications

In [None]:
import nltk

nltk.download('vader_lexicon', quiet=True)
nltk.download('sentiwordnet', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)

from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize
from nltk import pos_tag

## 13.1 VADER Sentiment Analyzer

**VADER** (Valence Aware Dictionary and sEntiment Reasoner) is designed for social media text.

Features:
- Handles emojis, slang, capitalizations
- No training required
- Fast and efficient

In [None]:
# Initialize VADER
sia = SentimentIntensityAnalyzer()

# Analyze a sentence
text = "I love this product! It's absolutely amazing."
scores = sia.polarity_scores(text)

print(f"Text: {text}")
print(f"Scores: {scores}")

In [None]:
# Understanding the scores
print("VADER Scores Explained:")
print("-" * 40)
print(f"  neg: Negative sentiment (0 to 1)")
print(f"  neu: Neutral sentiment (0 to 1)")
print(f"  pos: Positive sentiment (0 to 1)")
print(f"  compound: Overall score (-1 to 1)")
print("\nCompound interpretation:")
print(f"  >= 0.05: Positive")
print(f"  <= -0.05: Negative")
print(f"  Between: Neutral")

In [None]:
# Test various sentiments
sentences = [
    "I love this movie! It's fantastic!",
    "This is the worst experience ever.",
    "The weather is okay today.",
    "I'm not happy with the service.",
    "This product is not bad at all.",
    "AMAZING!!! Best purchase EVER!!! üòç",
    "meh, it's whatever...",
    "The food was good but the service was terrible.",
]

print("Sentiment Analysis Results")
print("=" * 70)

for sent in sentences:
    scores = sia.polarity_scores(sent)
    compound = scores['compound']
    
    if compound >= 0.05:
        sentiment = "POSITIVE üòä"
    elif compound <= -0.05:
        sentiment = "NEGATIVE üòû"
    else:
        sentiment = "NEUTRAL üòê"
    
    print(f"\n{sent}")
    print(f"  Compound: {compound:>6.3f} ‚Üí {sentiment}")

## 13.2 VADER Handles Special Cases

In [None]:
# Capitalization (emphasis)
print("Effect of CAPITALIZATION:")
print("-" * 40)

text1 = "This is great."
text2 = "This is GREAT."

print(f"'{text1}' ‚Üí compound: {sia.polarity_scores(text1)['compound']:.3f}")
print(f"'{text2}' ‚Üí compound: {sia.polarity_scores(text2)['compound']:.3f}")

In [None]:
# Punctuation (intensity)
print("\nEffect of PUNCTUATION:")
print("-" * 40)

texts = [
    "I love it.",
    "I love it!",
    "I love it!!",
    "I love it!!!",
]

for text in texts:
    score = sia.polarity_scores(text)['compound']
    print(f"'{text}' ‚Üí compound: {score:.3f}")

In [None]:
# Negation handling
print("\nEffect of NEGATION:")
print("-" * 40)

texts = [
    "This is good.",
    "This is not good.",
    "This isn't good.",
    "This is not bad.",
]

for text in texts:
    score = sia.polarity_scores(text)['compound']
    print(f"'{text}' ‚Üí compound: {score:.3f}")

In [None]:
# Emoji support
print("\nEffect of EMOJIS:")
print("-" * 40)

texts = [
    "I like this.",
    "I like this üòä",
    "I like this üòäüòäüòä",
    "Great job üëç",
    "This is bad üò¢",
]

for text in texts:
    score = sia.polarity_scores(text)['compound']
    print(f"'{text}' ‚Üí compound: {score:.3f}")

## 13.3 SentiWordNet

**SentiWordNet** assigns sentiment scores to WordNet synsets.

In [None]:
# Get sentiment for a synset
happy = swn.senti_synset('happy.a.01')

print(f"Synset: happy.a.01")
print(f"  Positive score: {happy.pos_score():.3f}")
print(f"  Negative score: {happy.neg_score():.3f}")
print(f"  Objective score: {happy.obj_score():.3f}")

In [None]:
# Compare sentiments
words = [
    ('happy', 'a'),
    ('sad', 'a'),
    ('good', 'a'),
    ('bad', 'a'),
    ('love', 'n'),
    ('hate', 'n'),
]

print("SentiWordNet Scores")
print("=" * 55)
print(f"{'Word':<12} {'Positive':<12} {'Negative':<12} {'Objective'}")
print("-" * 55)

for word, pos in words:
    synsets = list(swn.senti_synsets(word, pos))
    if synsets:
        ss = synsets[0]
        print(f"{word:<12} {ss.pos_score():<12.3f} {ss.neg_score():<12.3f} {ss.obj_score():.3f}")

In [None]:
# All senses of a word
word = 'good'
print(f"All senses of '{word}':")
print("-" * 60)

for ss in list(swn.senti_synsets(word))[:5]:
    print(f"  {ss.synset.name():<20} pos:{ss.pos_score():.2f} neg:{ss.neg_score():.2f}")

## 13.4 SentiWordNet-based Analyzer

In [None]:
def get_wordnet_pos(tag):
    """Convert POS tag to WordNet POS"""
    if tag.startswith('J'):
        return wn.ADJ
    elif tag.startswith('V'):
        return wn.VERB
    elif tag.startswith('N'):
        return wn.NOUN
    elif tag.startswith('R'):
        return wn.ADV
    return None

def sentiwordnet_score(text):
    """Calculate sentiment using SentiWordNet"""
    tokens = word_tokenize(text.lower())
    tagged = pos_tag(tokens)
    
    pos_score = 0
    neg_score = 0
    count = 0
    
    for word, tag in tagged:
        wn_pos = get_wordnet_pos(tag)
        if wn_pos is None:
            continue
        
        synsets = list(swn.senti_synsets(word, wn_pos))
        if synsets:
            # Use first synset
            ss = synsets[0]
            pos_score += ss.pos_score()
            neg_score += ss.neg_score()
            count += 1
    
    if count == 0:
        return {'positive': 0, 'negative': 0, 'compound': 0}
    
    return {
        'positive': pos_score / count,
        'negative': neg_score / count,
        'compound': (pos_score - neg_score) / count
    }

In [None]:
# Test SentiWordNet analyzer
sentences = [
    "I love this wonderful product.",
    "This is terrible and horrible.",
    "The movie was okay.",
]

print("SentiWordNet Analysis")
print("=" * 50)

for sent in sentences:
    scores = sentiwordnet_score(sent)
    print(f"\n{sent}")
    print(f"  Positive: {scores['positive']:.3f}")
    print(f"  Negative: {scores['negative']:.3f}")
    print(f"  Compound: {scores['compound']:.3f}")

## 13.5 Complete Sentiment Analyzer Class

In [None]:
class SentimentAnalyzer:
    """Multi-method sentiment analyzer"""
    
    def __init__(self):
        self.vader = SentimentIntensityAnalyzer()
    
    def analyze_vader(self, text):
        """Analyze using VADER"""
        scores = self.vader.polarity_scores(text)
        return {
            'method': 'VADER',
            'positive': scores['pos'],
            'negative': scores['neg'],
            'neutral': scores['neu'],
            'compound': scores['compound'],
            'sentiment': self._classify(scores['compound'])
        }
    
    def analyze_sentiwordnet(self, text):
        """Analyze using SentiWordNet"""
        scores = sentiwordnet_score(text)
        return {
            'method': 'SentiWordNet',
            'positive': scores['positive'],
            'negative': scores['negative'],
            'compound': scores['compound'],
            'sentiment': self._classify(scores['compound'])
        }
    
    def analyze_all(self, text):
        """Analyze using all methods"""
        return {
            'text': text,
            'vader': self.analyze_vader(text),
            'sentiwordnet': self.analyze_sentiwordnet(text)
        }
    
    def _classify(self, score, threshold=0.05):
        """Classify sentiment based on score"""
        if score >= threshold:
            return 'positive'
        elif score <= -threshold:
            return 'negative'
        return 'neutral'
    
    def analyze_batch(self, texts, method='vader'):
        """Analyze multiple texts"""
        results = []
        for text in texts:
            if method == 'vader':
                result = self.analyze_vader(text)
            else:
                result = self.analyze_sentiwordnet(text)
            result['text'] = text
            results.append(result)
        return results

In [None]:
# Use the analyzer
analyzer = SentimentAnalyzer()

text = "I absolutely love this amazing product! Best purchase ever!"

print(f"Text: {text}\n")

# VADER
vader_result = analyzer.analyze_vader(text)
print("VADER Analysis:")
for key, value in vader_result.items():
    print(f"  {key}: {value}")

# SentiWordNet
swn_result = analyzer.analyze_sentiwordnet(text)
print("\nSentiWordNet Analysis:")
for key, value in swn_result.items():
    print(f"  {key}: {value}")

## 13.6 Practical: Review Analysis

In [None]:
# Sample product reviews
reviews = [
    "Absolutely fantastic! Best purchase I've made this year.",
    "Terrible quality. Broke after one week. Don't buy!",
    "It's okay. Does what it's supposed to do.",
    "Love it! Works great and fast shipping!",
    "Disappointed. Expected better quality for the price.",
    "Not bad, not great. Average product.",
    "AMAZING!!! Exceeded all my expectations! üòç",
    "Waste of money. Returning immediately.",
    "Pretty good overall. Minor issues but happy with it.",
    "The worst! Never buying from here again!",
]

analyzer = SentimentAnalyzer()
results = analyzer.analyze_batch(reviews)

print("Product Review Sentiment Analysis")
print("=" * 70)

positive = negative = neutral = 0

for result in results:
    sentiment = result['sentiment']
    compound = result['compound']
    
    if sentiment == 'positive':
        positive += 1
        emoji = 'üòä'
    elif sentiment == 'negative':
        negative += 1
        emoji = 'üòû'
    else:
        neutral += 1
        emoji = 'üòê'
    
    print(f"\n{emoji} [{compound:>6.3f}] {result['text'][:50]}...")

print("\n" + "=" * 70)
print(f"Summary: {positive} positive, {neutral} neutral, {negative} negative")
print(f"Overall sentiment: {'Positive' if positive > negative else 'Negative' if negative > positive else 'Mixed'}")

In [None]:
# Visualization
import matplotlib.pyplot as plt

# Sentiment distribution
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Pie chart
labels = ['Positive', 'Neutral', 'Negative']
sizes = [positive, neutral, negative]
colors = ['#4CAF50', '#FFC107', '#F44336']

axes[0].pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
axes[0].set_title('Sentiment Distribution')

# Compound scores
compounds = [r['compound'] for r in results]
colors = ['green' if c >= 0.05 else 'red' if c <= -0.05 else 'gray' for c in compounds]

axes[1].barh(range(len(compounds)), compounds, color=colors)
axes[1].set_yticks(range(len(compounds)))
axes[1].set_yticklabels([f"Review {i+1}" for i in range(len(compounds))])
axes[1].set_xlabel('Compound Score')
axes[1].set_title('Sentiment Scores by Review')
axes[1].axvline(x=0, color='black', linestyle='-', linewidth=0.5)

plt.tight_layout()
plt.show()

## Summary

| Method | Best For | Speed |
|--------|----------|-------|
| VADER | Social media, reviews, informal text | Fast |
| SentiWordNet | Formal text, fine-grained analysis | Medium |

### VADER Scores
- **neg, neu, pos**: Proportion (0-1)
- **compound**: Overall (-1 to 1)
  - ‚â• 0.05: Positive
  - ‚â§ -0.05: Negative
  - Between: Neutral

### Key Code
```python
from nltk.sentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()
scores = sia.polarity_scores(text)
```