# 💬 Sentiment & Theme Analysis

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import seaborn as sns

df = pd.read_csv('../outputs/final_sentiment_reviews.csv')
df.head()
    

### Sentiment Distribution by Bank

In [None]:

sns.countplot(data=df, x='bank', hue='sentiment_label')
plt.title("Sentiment Distribution by Bank")
plt.ylabel("Review Count")
plt.xlabel("Bank")
plt.legend(title='Sentiment')
plt.show()
    

### Word Clouds per Bank

In [None]:

for bank in df['bank'].unique():
    text = " ".join(df[df['bank'] == bank]['review_text'].dropna().astype(str).values)
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.title(f"Word Cloud for {bank}")
    plt.show()
    

### Sentiment vs. Rating

In [None]:

sns.boxplot(x='sentiment_label', y='rating', data=df)
plt.title("Rating Distribution by Sentiment Label")
plt.show()
    

### Top Keywords by TF-IDF (Basic Example Placeholder)

In [None]:

from sklearn.feature_extraction.text import TfidfVectorizer

corpus = df['review_text'].dropna().astype(str)
vectorizer = TfidfVectorizer(max_features=10, stop_words='english')
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names_out())
    