# Insights and Visualization

This notebook generates insights and visualizations from sentiment analysis and topic modeling results.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print("Libraries imported successfully!")


## Load Data


In [None]:
# Load sentiment scores
sentiment_df = pd.read_csv('../results/sentiment_scores.csv')
sentiment_df['date'] = pd.to_datetime(sentiment_df.get('date', pd.date_range('2024-01-01', periods=len(sentiment_df), freq='D')))

# Load topic keywords
topic_df = pd.read_csv('../results/topic_keywords.csv')

print(f"Loaded {len(sentiment_df)} sentiment scores")
print(f"Loaded {len(topic_df)} topics")


## Trend Analysis Over Time


In [None]:
# Aggregate sentiment over time
if 'date' in sentiment_df.columns:
    sentiment_df['date'] = pd.to_datetime(sentiment_df['date'])
    sentiment_df['month'] = sentiment_df['date'].dt.to_period('M')
    
    monthly_sentiment = sentiment_df.groupby('month')['vader_compound'].mean()
    
    plt.figure(figsize=(14, 6))
    monthly_sentiment.plot(kind='line', marker='o', linewidth=2, markersize=8)
    plt.title('Sentiment Trend Over Time', fontsize=16)
    plt.xlabel('Month', fontsize=12)
    plt.ylabel('Average Sentiment Score', fontsize=12)
    plt.axhline(y=0, color='r', linestyle='--', alpha=0.5)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('../results/visuals/trend_over_time.png', dpi=300, bbox_inches='tight')
    plt.show()
else:
    print("Date column not found. Creating sample trend visualization...")
    # Create sample trend
    dates = pd.date_range('2024-01-01', periods=len(sentiment_df), freq='D')
    sentiment_df['date'] = dates
    sentiment_df['month'] = sentiment_df['date'].dt.to_period('M')
    monthly_sentiment = sentiment_df.groupby('month')['vader_compound'].mean()
    
    plt.figure(figsize=(14, 6))
    monthly_sentiment.plot(kind='line', marker='o', linewidth=2, markersize=8)
    plt.title('Sentiment Trend Over Time', fontsize=16)
    plt.xlabel('Month', fontsize=12)
    plt.ylabel('Average Sentiment Score', fontsize=12)
    plt.axhline(y=0, color='r', linestyle='--', alpha=0.5)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('../results/visuals/trend_over_time.png', dpi=300, bbox_inches='tight')
    plt.show()


In [None]:
print("=" * 60)
print("KEY INSIGHTS")
print("=" * 60)

print(f"\n1. Sentiment Distribution:")
print(sentiment_df['sentiment_label'].value_counts())

print(f"\n2. Average Sentiment Score: {sentiment_df['vader_compound'].mean():.3f}")

print(f"\n3. Top Topics Identified:")
for _, row in topic_df.iterrows():
    print(f"   Topic {row['topic_id']}: {row['keywords'][:50]}...")

print(f"\n4. Positive Reviews: {len(sentiment_df[sentiment_df['sentiment_label'] == 'positive'])}")
print(f"   Negative Reviews: {len(sentiment_df[sentiment_df['sentiment_label'] == 'negative'])}")
print(f"   Neutral Reviews: {len(sentiment_df[sentiment_df['sentiment_label'] == 'neutral'])}")

print("\n" + "=" * 60)
print("RECOMMENDATIONS")
print("=" * 60)
print("1. Focus on addressing common complaints identified in negative reviews")
print("2. Highlight praised features from positive reviews in marketing")
print("3. Monitor sentiment trends to detect shifts in customer perception")
print("4. Use topic modeling insights to improve product features")
