In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from textblob import TextBlob
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, confusion_matrix

# Make plots a bit prettier
sns.set_theme(style="whitegrid")
plt.rcParams["figure.figsize"] = (8, 4)


In [2]:
# Cell 3: Download VADER lexicon for sentiment analysis (only first time)
nltk.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...


In [3]:
df=pd.read_csv('../data/processed/reviews_processed.csv')
df.head()

Unnamed: 0,review_id,review_text,rating,review_date,review_year,review_month,bank_code,bank_name,user_name,thumbs_up,text_length,source
0,ba01d6b7-97fe-4376-b68c-817f397c00af,I can't access the loan in the app. only it wo...,3,2025-11-28,2025,11,Awash,Awash Bank,FAHMI SHEMSEDIN,0,56,Google Play
1,bde60e8b-0b05-4c88-9f60-c14c18fa57b9,no one from all ethiopian bank keep it up nice,5,2025-11-28,2025,11,Awash,Awash Bank,Zed Cherinet,0,46,Google Play
2,fae55af8-1c5a-41f5-b6e8-55652afbf303,thenk you ‚úçÔ∏èüôè,5,2025-11-26,2025,11,Awash,Awash Bank,dechasa leta,0,13,Google Play
3,e35ece9f-a9fb-4510-a9e3-8b8487a99d12,i hope to be good app,5,2025-11-24,2025,11,Awash,Awash Bank,Mulubirhan Yikunom,0,21,Google Play
4,fb9ed669-ea1d-4eb5-a336-634ccc59e265,amazing üòçüòç,5,2025-11-23,2025,11,Awash,Awash Bank,Mifta Raya,0,10,Google Play


In [4]:

def rating_to_label(r):
    if r <= 2:
        return "negative"
    elif r == 3:
        return "neutral"
    else:
        return "positive"

df["sentiment_label"] = df["rating"].apply(rating_to_label)
df


Unnamed: 0,review_id,review_text,rating,review_date,review_year,review_month,bank_code,bank_name,user_name,thumbs_up,text_length,source,sentiment_label
0,ba01d6b7-97fe-4376-b68c-817f397c00af,I can't access the loan in the app. only it wo...,3,2025-11-28,2025,11,Awash,Awash Bank,FAHMI SHEMSEDIN,0,56,Google Play,neutral
1,bde60e8b-0b05-4c88-9f60-c14c18fa57b9,no one from all ethiopian bank keep it up nice,5,2025-11-28,2025,11,Awash,Awash Bank,Zed Cherinet,0,46,Google Play,positive
2,fae55af8-1c5a-41f5-b6e8-55652afbf303,thenk you ‚úçÔ∏èüôè,5,2025-11-26,2025,11,Awash,Awash Bank,dechasa leta,0,13,Google Play,positive
3,e35ece9f-a9fb-4510-a9e3-8b8487a99d12,i hope to be good app,5,2025-11-24,2025,11,Awash,Awash Bank,Mulubirhan Yikunom,0,21,Google Play,positive
4,fb9ed669-ea1d-4eb5-a336-634ccc59e265,amazing üòçüòç,5,2025-11-23,2025,11,Awash,Awash Bank,Mifta Raya,0,10,Google Play,positive
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1195,b3c8405c-96a7-4b5e-884c-76c97c530c34,good,5,2025-05-09,2025,5,Dashen,Dashen Bank,fenta abebayehu,1,4,Google Play,positive
1196,e1c1214a-8bc1-45db-bc49-3d51dddc6b88,Amazing app super easy to use and best design....,5,2025-05-09,2025,5,Dashen,Dashen Bank,Yitbarek Gossaye,1,56,Google Play,positive
1197,8d1d472b-2bae-4749-b089-5632108ade02,its the best ever,5,2025-05-09,2025,5,Dashen,Dashen Bank,natnael abera,1,17,Google Play,positive
1198,749851ed-72d7-4f1c-8e5e-27dd822b5008,nice,5,2025-05-08,2025,5,Dashen,Dashen Bank,Mehammed Amin,1,4,Google Play,positive


In [5]:

df["clean_text"] = df["review_text"].str.lower()
df[["review_text", "clean_text"]]


Unnamed: 0,review_text,clean_text
0,I can't access the loan in the app. only it wo...,i can't access the loan in the app. only it wo...
1,no one from all ethiopian bank keep it up nice,no one from all ethiopian bank keep it up nice
2,thenk you ‚úçÔ∏èüôè,thenk you ‚úçÔ∏èüôè
3,i hope to be good app,i hope to be good app
4,amazing üòçüòç,amazing üòçüòç
...,...,...
1195,good,good
1196,Amazing app super easy to use and best design....,amazing app super easy to use and best design....
1197,its the best ever,its the best ever
1198,nice,nice
