In [2]:
import pandas as pd
from textblob import TextBlob
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud

# Load the dataset
df = pd.read_csv("googleplaystore_user_reviews.csv")

# Drop missing values and remove unwanted rows
df.dropna(subset=['Translated_Review'], inplace=True)
df = df[df['Translated_Review'].str.lower() != 'nan']

# Sentiment analysis using TextBlob
df['polarity'] = df['Translated_Review'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)
df['subjectivity'] = df['Translated_Review'].apply(lambda x: TextBlob(str(x)).sentiment.subjectivity)

# Sentiment categorization
def get_sentiment(p):
    if p > 0:
        return 'Positive'
    elif p < 0:
        return 'Negative'
    else:
        return 'Neutral'

df['sentiment_category'] = df['polarity'].apply(get_sentiment)

# Save for Power BI
df.to_csv("cleaned_sentiment_reviews.csv", index=False)

# Visualization 1: Sentiment distribution
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x='sentiment_category', palette='pastel')
plt.title('Sentiment Distribution')
plt.xlabel('Sentiment')
plt.ylabel('Number of Reviews')
plt.tight_layout()
plt.savefig("sentiment_distribution.png")
plt.close()

# Visualization 2: Word Cloud
text = " ".join(str(review) for review in df['Translated_Review'])
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Most Common Words in Reviews")
plt.savefig("review_wordcloud.png")
plt.close()

print("✅ Analysis complete. CSV and visualizations saved.")


✅ Analysis complete. CSV and visualizations saved.
