# Install Required Libraries

In [18]:
import pandas as pd
from textblob import TextBlob
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import Counter
import re
import nltk
from nltk.corpus import stopwords
from sklearn.cluster import KMeans

## Load processed reviews

In [19]:
data = pd.read_csv('../data/processed/reviews_processed.csv')

# Sentiment Analysis

In [20]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\PC\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## Apply sentiment analysis

In [21]:
stop_words = set(stopwords.words('english'))
def get_sentiment(text):
    blob = TextBlob(str(text))
    score = blob.sentiment.polarity
    if score > 0.5:
        label = 'Positive'
    elif score < -0.5:
        label = 'Negative'
    else:
        label = 'Neutral'
    return pd.Series([score, label])
data[['sentiment_score', 'sentiment_label']] = data['review_text'].apply(get_sentiment)

##  Aggregate by bank and rating and Sentiment Analysis

In [22]:
agg_sentiment = data.groupby(['bank_name', 'rating'], as_index=False)['sentiment_score'].mean()
agg_sentiment['rating_stars'] = agg_sentiment['rating'].apply(lambda x: '⭐' * int(x))
agg_sentiment

Unnamed: 0,bank_name,rating,sentiment_score,rating_stars
0,Bank of Abyssinia,1,-0.12359,⭐
1,Bank of Abyssinia,2,0.021954,⭐⭐
2,Bank of Abyssinia,3,0.086186,⭐⭐⭐
3,Bank of Abyssinia,4,0.200155,⭐⭐⭐⭐
4,Bank of Abyssinia,5,0.318071,⭐⭐⭐⭐⭐
5,Commercial Bank of Ethiopia,1,-0.084397,⭐
6,Commercial Bank of Ethiopia,2,0.108198,⭐⭐
7,Commercial Bank of Ethiopia,3,0.092037,⭐⭐⭐
8,Commercial Bank of Ethiopia,4,0.271706,⭐⭐⭐⭐
9,Commercial Bank of Ethiopia,5,0.380496,⭐⭐⭐⭐⭐


In [23]:
data.to_csv('../data/processed/reviews_with_sentiment.csv', index=False)

# Thematic Analysis
### Preprocess text for TF-IDF

In [24]:
def preprocess_text(text):
    text = str(text).lower()
    tokens = [words for words in text.split() if words not in stop_words and words.isalpha() and len(words)>2]
    return ' '.join(tokens)


In [25]:
data['clean_text'] = data['review_text'].apply(preprocess_text)
data['clean_text']

0                                    app makes life thank
1                                                 app bad
2                                           advanced stay
3                                        good application
4                                                nice app
                              ...                        
1450    underrated app ever seen banking system soo id...
1451                            demands disable developer
1452    two months cannot transaction using really usi...
1453                                    open open service
1454                                                  nan
Name: clean_text, Length: 1455, dtype: object

In [27]:
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(ngram_range=(1,2),max_features=500)
x = vectorizer.fit_transform(data['clean_text'].dropna())
keywords = vectorizer.get_feature_names_out()
keywords[:20]

array(['able', 'absolutely', 'abyssinia', 'access', 'account', 'accounts',
       'activate', 'active', 'actually', 'add', 'address', 'ahead',
       'airtime', 'allow', 'almost', 'also', 'always', 'amazing',
       'amazing app', 'amole'], dtype=object)

# KMeans Clustering for Topics

In [28]:
kmeans = KMeans(n_clusters=5, random_state=42,)
data['topic'] = kmeans.fit_predict(x)
data['topic'].head(5)

0    2
1    2
2    2
3    0
4    2
Name: topic, dtype: int32

## Cluster into Themes

In [29]:
theme_mapping = {
     0: "Ease of Use / Performance",
    1: "General Praise / Mixed Feedback",
    2: "User Experience / App Features",
    3: "Bugs / Issues / Reliability",
    4: "Positive Feedback / UI Appreciation"
}

data['theme'] = data['topic'].map(theme_mapping)
data.head(5)

Unnamed: 0,review_text,rating,review_date,bank_name,sentiment_score,sentiment_label,clean_text,topic,theme
0,The app makes our life easier. Thank you CBE!,5,2025-11-27,Commercial Bank of Ethiopia,0.0,Neutral,app makes life thank,2,User Experience / App Features
1,this app very bad,1,2025-11-27,Commercial Bank of Ethiopia,-0.91,Negative,app bad,2,User Experience / App Features
2,the most advanced app. but how to stay safe?,5,2025-11-27,Commercial Bank of Ethiopia,0.466667,Neutral,advanced stay,2,User Experience / App Features
3,Good application,4,2025-11-27,Commercial Bank of Ethiopia,0.7,Positive,good application,0,Ease of Use / Performance
4,It is nice app,5,2025-11-26,Commercial Bank of Ethiopia,0.6,Positive,nice app,2,User Experience / App Features


In [30]:
data.to_csv('../data/processed/reviews_final.csv')