In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load cleaned data
df = pd.read_csv("cleaned_customer_reviews.csv")

# 1. Summary Statistics
print("Summary Statistics:")
print(df.describe())

print("\nSentiment Distribution:")
print(df['sentiment_polarity'].value_counts())

# 2. Visualizations
plt.figure(figsize=(12, 6))

# Plot 1: Distribution of Ratings
plt.subplot(1, 2, 1)
sns.histplot(df['rating'], bins=5, kde=True)
plt.title("Distribution of Customer Ratings")

# Plot 2: Sentiment Polarity vs. Rating
plt.subplot(1, 2, 2)
sns.scatterplot(x='sentiment_polarity', y='rating', data=df, alpha=0.6)
plt.title("Sentiment vs. Rating")

plt.tight_layout()
plt.savefig("plots/rating_sentiment_analysis.png")
plt.show()

# Plot 3: Review Length Analysis
plt.figure(figsize=(8, 4))
sns.boxplot(x='rating', y='review_length', data=df)
plt.title("Review Length by Rating")
plt.savefig("plots/review_length_by_rating.png")
plt.show()