# Install Required Libraries

In [1]:
import pandas as pd
from textblob import TextBlob
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import Counter
import re
import nltk
from nltk.corpus import stopwords

## Load processed reviews

In [2]:
data = pd.read_csv('../data/processed/reviews_processed.csv')

# Sentiment Analysis

In [3]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\PC\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

## Apply sentiment analysis

In [4]:
stop_words = set(stopwords.words('english'))
def get_sentiment(text):
    blob = TextBlob(str(text))
    score = blob.sentiment.polarity
    if score > 0.5:
        label = 'Positive'
    elif score < -0.5:
        label = 'Negative'
    else:
        label = 'Neutral'
    return pd.Series([score, label])
data[['sentiment_score', 'sentiment_label']] = data['review_text'].apply(get_sentiment)

##  Aggregate by bank and rating and Sentiment Analysis

In [5]:
agg_sentiment = data.groupby(['bank_name', 'rating'], as_index=False)['sentiment_score'].mean()
agg_sentiment['rating_stars'] = agg_sentiment['rating'].apply(lambda x: '⭐' * int(x))
agg_sentiment

Unnamed: 0,bank_name,rating,sentiment_score,rating_stars
0,Bank of Abyssinia,1,-0.12359,⭐
1,Bank of Abyssinia,2,0.021954,⭐⭐
2,Bank of Abyssinia,3,0.086186,⭐⭐⭐
3,Bank of Abyssinia,4,0.200155,⭐⭐⭐⭐
4,Bank of Abyssinia,5,0.318071,⭐⭐⭐⭐⭐
5,Commercial Bank of Ethiopia,1,-0.084397,⭐
6,Commercial Bank of Ethiopia,2,0.108198,⭐⭐
7,Commercial Bank of Ethiopia,3,0.092037,⭐⭐⭐
8,Commercial Bank of Ethiopia,4,0.271706,⭐⭐⭐⭐
9,Commercial Bank of Ethiopia,5,0.380496,⭐⭐⭐⭐⭐


In [6]:
data.to_csv('../data/processed/reviews_with_sentiment.csv', index=False)

# Thematic Analysis
### Preprocess text for TF-IDF

In [7]:
def preprocess_text(text):
    text = str(text).lower()
    tokens = [words for words in text.split() if words not in stop_words and words.isalpha() and len(words)>2]
    return ' '.join(tokens)


In [8]:
clean_text = data['review_text'].apply(preprocess_text)
clean_text[:10]

0                                 app makes life thank
1                                              app bad
2                                        advanced stay
3                                     good application
4                                             nice app
5                                                 best
6                                             good app
7    suddenly asked enter verification key get veri...
8                                                 nice
9                    good banking service fast service
Name: review_text, dtype: object

In [9]:
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=100)
x = vectorizer.fit_transform(clean_text)
keywords = vectorizer.get_feature_names_out()
keywords[:20]

array(['able', 'access', 'account', 'also', 'always', 'amazing',
       'android', 'app', 'application', 'apps', 'bad', 'bank', 'banking',
       'best', 'better', 'big', 'boa', 'cbe', 'crashes', 'dashen'],
      dtype=object)

## Cluster into Themes

In [10]:
themes = {
    'Account Access Issues': ['login', 'password', 'access', 'otp'],
    'Transaction Performance': ['transfer', 'payment', 'slow', 'failed'],
    'User Interface & Experience': ['ui', 'design', 'app', 'navigate'],
    'Customer Support': ['support', 'help', 'call', 'response'],
    'Feature Requests': ['feature', 'add', 'improve']
}
def assign_theme(text):
    assigned =[]
    for theme, keyword_list in themes.items():
        if any(word in text for word in keyword_list):
            assigned.append(theme)
    return ', '.join(assigned) if assigned else 'Other'
data['themes'] = clean_text.apply(assign_theme)
data['themes']

0       User Interface & Experience
1       User Interface & Experience
2                             Other
3       User Interface & Experience
4       User Interface & Experience
                   ...             
1450    User Interface & Experience
1451                          Other
1452                          Other
1453                          Other
1454                          Other
Name: themes, Length: 1455, dtype: object

In [11]:
data.to_csv('../data/processed/reviews_final.csv')