# 💬 Task 2: Sentiment and Thematic Analysis

## 1. Introduction

In [None]:
import pandas as pd

# Load cleaned reviews
df = pd.read_csv('../data/bank_reviews_clean.csv')
df.head()

Unnamed: 0,review,rating,date,bank,source
0,20 years,5,2025-06-08,CBE,Google Play
1,A great app. It's like carrying a bank in your...,4,2025-06-07,CBE,Google Play
2,More than garrantty bank EBC.,4,2025-06-07,CBE,Google Play
3,really am happy to this app it is Siple to use...,5,2025-06-07,CBE,Google Play
4,I liked this app. But the User interface is ve...,2,2025-06-07,CBE,Google Play


## 2. Sentiment Analysis

## 2. Compare with VADER

In [None]:
from transformers import pipeline

# Load sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", framework="pt")

# Apply to reviews (sample for speed)
df['sentiment'] = df['review_text'].apply(lambda x: sentiment_pipeline(str(x))[0]['label'])
df['sentiment_score'] = df['review_text'].apply(lambda x: sentiment_pipeline(str(x))[0]['score'])
df[['review_text', 'sentiment', 'sentiment_score']].head()

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()
df['vader_compound'] = df['review_text'].apply(lambda x: analyzer.polarity_scores(str(x))['compound'])
df[['review_text', 'vader_compound']].head()

## 3. Sentiment Aggregation by Bank

In [None]:
sentiment_summary = df.groupby(['bank', 'sentiment']).size().unstack(fill_value=0)
sentiment_summary

## 4. Keyword Extraction (TF-IDF)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(max_features=20, stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['review_text'].astype(str))
keywords = tfidf.get_feature_names_out()
print("Top keywords:", keywords)

## 5. Thematic Clustering (Manual grouping)

Example: Assign themes based on keywords (pseudo-code)

In [None]:
def assign_theme(text):
    if 'login' in text or 'access' in text:
        return 'Account Access Issues'
    elif 'transfer' in text or 'transaction' in text:
        return 'Transaction Performance'
    elif 'interface' in text or 'design' in text:
        return 'User Interface & Experience'
    elif 'support' in text or 'help' in text:
        return 'Customer Support'
    else:
        return 'Other'

df['theme'] = df['review_text'].apply(lambda x: assign_theme(str(x).lower()))
df[['review_text', 'theme']].head()

## 6. Save Processed Data

In [None]:
df.to_csv('../data/sentiment_and_themes.csv', index=False)

## 7. Visualize: Sentiment Distribution & Theme Frequency

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8,4))
sns.countplot(data=df, x='sentiment', hue='bank')
plt.title('Sentiment Distribution by Bank')
plt.show()

plt.figure(figsize=(8,4))
sns.countplot(data=df, x='theme', hue='bank')
plt.title('Theme Frequency by Bank')
plt.xticks(rotation=45)
plt.show()