# Sentiment Analysis

This notebook performs sentiment analysis on retail reviews using VADER and TextBlob.


In [None]:
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns

# Initialize VADER analyzer
analyzer = SentimentIntensityAnalyzer()

print("Libraries imported successfully!")


## Load Processed Data


In [None]:
df = pd.read_csv('../data/processed_reviews.csv')
print(f"Loaded {len(df)} processed reviews")
df.head()


## Sentiment Analysis


In [None]:
# Get VADER sentiment scores
print("Computing VADER sentiment scores...")
vader_scores = df['cleaned_text'].apply(lambda x: analyzer.polarity_scores(str(x)))
df['vader_compound'] = vader_scores.apply(lambda x: x['compound'])
df['vader_pos'] = vader_scores.apply(lambda x: x['pos'])
df['vader_neu'] = vader_scores.apply(lambda x: x['neu'])
df['vader_neg'] = vader_scores.apply(lambda x: x['neg'])

# Get TextBlob sentiment scores
print("Computing TextBlob sentiment scores...")
df['textblob_polarity'] = df['cleaned_text'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)

# Classify sentiment
def classify_sentiment(compound_score):
    if compound_score >= 0.05:
        return 'positive'
    elif compound_score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

df['sentiment_label'] = df['vader_compound'].apply(classify_sentiment)

print("Sentiment analysis complete!")
df[['review_text', 'vader_compound', 'textblob_polarity', 'sentiment_label']].head()


## Sentiment Distribution


In [None]:
# Plot sentiment distribution
plt.figure(figsize=(10, 6))
sentiment_counts = df['sentiment_label'].value_counts()
plt.bar(sentiment_counts.index, sentiment_counts.values, color=['green', 'red', 'gray'])
plt.title('Sentiment Distribution', fontsize=16)
plt.xlabel('Sentiment Label', fontsize=12)
plt.ylabel('Number of Reviews', fontsize=12)
plt.tight_layout()
plt.savefig('../results/visuals/sentiment_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

print("Sentiment Distribution:")
print(sentiment_counts)
print(f"\nAverage VADER Compound Score: {df['vader_compound'].mean():.3f}")
print(f"Average TextBlob Polarity: {df['textblob_polarity'].mean():.3f}")


## Save Results


In [None]:
# Save sentiment scores
output_df = df[['review_id', 'review_text', 'cleaned_text', 'vader_compound', 
                'vader_pos', 'vader_neu', 'vader_neg', 'textblob_polarity', 'sentiment_label']].copy()
output_df.to_csv('../results/sentiment_scores.csv', index=False)
print("Sentiment scores saved to ../results/sentiment_scores.csv")
