# Retail Semantic Analysis Demo
This notebook demonstrates the complete pipeline for retail semantic analysis including data preprocessing, sentiment analysis, topic modeling, and visualization generation.

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Import our custom modules
from data_loader import RetailDataLoader
from preprocessor import RetailTextPreprocessor
from sentiment_analyzer import RetailSentimentAnalyzer
from topic_modeling import RetailTopicModeler
from visualizations import RetailVisualizationGenerator

print("All modules imported successfully!")

## 1. Data Loading and Preparation

In [None]:
# Initialize data loader
loader = RetailDataLoader()

# For demo purposes, create a sample dataset
# In practice, you would load real data using:
# loader.download_amazon_reviews()
# df = loader.load_amazon_reviews()

# Create sample dataset for demonstration
df = loader.create_sample_dataset(size=1000)
print(f"Dataset loaded with {len(df)} reviews")
print("\nDataset info:")
print(df.info())
print("\nFirst few rows:")
df.head()

## 2. Data Preprocessing

In [None]:
# Initialize preprocessor
preprocessor = RetailTextPreprocessor()

# Preprocess the dataset
df_processed = preprocessor.preprocess_dataframe(df, text_column='review_text')

print(f"Processed dataset has {len(df_processed)} reviews")
print("\nText statistics:")
stats = preprocessor.get_text_statistics(df_processed)
print(stats)

# Show example of preprocessing
print("\nExample of preprocessing:")
print("Original:", df['review_text'].iloc[0])
print("Processed:", df_processed['review_text_clean'].iloc[0])

## 3. Sentiment Analysis

In [None]:
# Initialize sentiment analyzer
sentiment_analyzer = RetailSentimentAnalyzer(model_type='textblob')

# Analyze sentiment
df_sentiment = sentiment_analyzer.analyze_dataframe(df_processed)

print("Sentiment analysis completed!")
print("\nSentiment distribution:")
sentiment_dist = sentiment_analyzer.get_sentiment_distribution(df_sentiment)
print(sentiment_dist)

# Show example results
print("\nExample sentiment analysis:")
for i in range(3):
    print(f"Text: {df_sentiment['review_text_clean'].iloc[i][:50]}...")
    print(f"Sentiment: {df_sentiment['sentiment_sentiment'].iloc[i]}")
    print(f"Confidence: {df_sentiment['sentiment_confidence'].iloc[i]:.3f}")
    print()

## 4. Topic Modeling

In [None]:
# Initialize topic modeler
topic_modeler = RetailTopicModeler(method='lda')

# Fit topic model
texts = df_sentiment['review_text_clean'].tolist()
topic_modeler.fit_topic_model(texts, num_topics=5)

# Get topic words
topic_words = topic_modeler.get_topic_words(num_words=10)
print("Topic words:")
for i, words in enumerate(topic_words):
    print(f"Topic {i}: {', '.join(words)}")

# Create topic summary
topic_summary = topic_modeler.create_topic_summary(texts)
print("\nTopic summary:")
print(topic_summary[['topic_id', 'top_words', 'document_count', 'percentage']])

## 5. Visualization Generation

In [None]:
# Initialize visualization generator
viz_generator = RetailVisualizationGenerator()

# Create sentiment distribution plot
fig1 = viz_generator.plot_sentiment_distribution(df_sentiment)
plt.show()

# Create sentiment by category plot
fig2 = viz_generator.plot_sentiment_by_category(df_sentiment)
plt.show()

# Create topic distribution plot
fig3 = viz_generator.plot_topic_distribution(topic_summary)
plt.show()

## 6. Word Cloud Generation

In [None]:
# Create word cloud for all reviews
all_text = ' '.join(df_sentiment['review_text_clean'].tolist())
wordcloud = viz_generator.create_word_cloud(all_text, title="Customer Reviews Word Cloud")
plt.show()

# Create word clouds for each sentiment
for sentiment in ['positive', 'negative', 'neutral']:
    if sentiment in df_sentiment['sentiment_sentiment'].values:
        sentiment_text = ' '.join(
            df_sentiment[df_sentiment['sentiment_sentiment'] == sentiment]['review_text_clean'].tolist()
        )
        if sentiment_text.strip():
            wordcloud = viz_generator.create_word_cloud(
                sentiment_text, 
                title=f"{sentiment.capitalize()} Reviews Word Cloud"
            )
            plt.show()

## 7. Comprehensive Dashboard

In [None]:
# Create comprehensive dashboard
dashboard = viz_generator.create_comprehensive_dashboard(df_sentiment, topic_summary)
plt.show()

## 8. Business Intelligence Insights

In [None]:
# Generate business insights
insights = {
    'total_reviews': len(df_sentiment),
    'sentiment_distribution': df_sentiment['sentiment_sentiment'].value_counts().to_dict(),
    'avg_confidence': df_sentiment['sentiment_confidence'].mean(),
    'category_performance': df_sentiment.groupby('product_category')['sentiment_sentiment'].value_counts().to_dict(),
    'top_topics': topic_summary[['topic_id', 'top_words', 'percentage']].to_dict('records')
}

print("=== BUSINESS INTELLIGENCE INSIGHTS ===")
print(f"Total Reviews Analyzed: {insights['total_reviews']:,}")
print(f"Average Sentiment Confidence: {insights['avg_confidence']:.3f}")
print("\nSentiment Distribution:")
for sentiment, count in insights['sentiment_distribution'].items():
    percentage = (count / insights['total_reviews']) * 100
    print(f"  {sentiment.capitalize()}: {count:,} ({percentage:.1f}%)")

print("\nTop Topics:")
for topic in insights['top_topics']:
    print(f"  Topic {topic['topic_id']}: {topic['top_words'][:50]}... ({topic['percentage']:.1f}%)")

print("\n=== ACTIONABLE RECOMMENDATIONS ===")
print("1. Monitor negative sentiment patterns for product improvement opportunities")
print("2. Leverage positive sentiment topics for marketing campaigns")
print("3. Address recurring issues identified in topic modeling")
print("4. Implement real-time sentiment monitoring for customer service")
print("5. Use topic trends to guide product development roadmap")

## 9. Save Results

In [None]:
# Save processed data
loader.save_processed_data(df_sentiment, 'analyzed_reviews.csv')
loader.save_processed_data(topic_summary, 'topic_summary.csv')

# Save all visualizations
viz_generator.save_all_plots(df_sentiment, topic_summary)

print("Analysis complete! Results saved to:")
print("- data/processed/analyzed_reviews.csv")
print("- data/processed/topic_summary.csv")
print("- figures/ (all visualization plots)")