# Sentiment Analysis

## Import libraries and custom functions

In [1]:
import pandas as pd
import os
import sys
import torch
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from scripts.sentiment_analysis import SentimentAnalysis
sentimentAnalysis = SentimentAnalysis()

  from .autonotebook import tqdm as notebook_tqdm
Device set to use cpu


In [2]:
# Reading data for all bank apps
try:
    processed_df = pd.read_csv('../data/processed/processed_bank_app_reviews.csv')
except FileNotFoundError:
    print("Error: 'processed_bank_app_reviews.csv' not found. Please run the previous code block first.")
    exit()

In [3]:
# Apply DistilBERT sentiment analysis
print("Applying DistilBERT sentiment analysis...")
processed_df['distilbert_sentiment'] = processed_df['review'].apply(sentimentAnalysis.analyze_sentiment_distilbert)

# Extract sentiment label and score into separate columns
processed_df['distilbert_label'] = processed_df['distilbert_sentiment'].apply(lambda x: x['sentiment_distilbert'])
processed_df['distilbert_score'] = processed_df['distilbert_sentiment'].apply(lambda x: x['score_distilbert'])

# Display sentiment distribution
print(processed_df['distilbert_label'].value_counts())

Applying DistilBERT sentiment analysis...
distilbert_label
POSITIVE    742
NEGATIVE    458
Name: count, dtype: int64


In [4]:
# Apply TextBlob sentiment analysis
print("Applying TextBlob sentiment analysis...")
processed_df['textblob_sentiment'] = processed_df['review'].apply(sentimentAnalysis.get_textblob_sentiment)

# Display sentiment distribution
print(processed_df['textblob_sentiment'].value_counts())

Applying TextBlob sentiment analysis...
textblob_sentiment
positive    757
neutral     317
negative    126
Name: count, dtype: int64


In [5]:
# Apply VADER sentiment analysis
print("Applying VADER sentiment analysis...")
processed_df['vader_sentiment'] = processed_df['review'].apply(sentimentAnalysis.get_vader_sentiment)

# Display sentiment distribution
print(processed_df['vader_sentiment'].value_counts())

Applying VADER sentiment analysis...
vader_sentiment
positive    763
neutral     298
negative    139
Name: count, dtype: int64


In [6]:
print("\nDataFrame with sentiment scores:")
display(processed_df)


DataFrame with sentiment scores:


Unnamed: 0.1,Unnamed: 0,review,rating,date,bank,source,distilbert_sentiment,distilbert_label,distilbert_score,textblob_sentiment,vader_sentiment
0,0,20 years,5,2025-06-08,Commercial Bank of Ethiopia (CBE),Google Play Store,"{'sentiment_distilbert': 'POSITIVE', 'score_di...",POSITIVE,0.991589,neutral,neutral
1,1,A great app. It's like carrying a bank in your...,4,2025-06-07,Commercial Bank of Ethiopia (CBE),Google Play Store,"{'sentiment_distilbert': 'POSITIVE', 'score_di...",POSITIVE,0.999293,positive,positive
2,2,More than garrantty bank EBC.,4,2025-06-07,Commercial Bank of Ethiopia (CBE),Google Play Store,"{'sentiment_distilbert': 'POSITIVE', 'score_di...",POSITIVE,0.997196,positive,neutral
3,3,really am happy to this app it is Siple to use...,5,2025-06-07,Commercial Bank of Ethiopia (CBE),Google Play Store,"{'sentiment_distilbert': 'POSITIVE', 'score_di...",POSITIVE,0.998870,positive,positive
4,4,I liked this app. But the User interface is ve...,2,2025-06-07,Commercial Bank of Ethiopia (CBE),Google Play Store,"{'sentiment_distilbert': 'NEGATIVE', 'score_di...",NEGATIVE,0.999684,positive,negative
...,...,...,...,...,...,...,...,...,...,...,...
1195,1195,Wow,5,2025-01-17,Dashen Bank,Google Play Store,"{'sentiment_distilbert': 'POSITIVE', 'score_di...",POSITIVE,0.999592,positive,positive
1196,1196,Dashen yichalal. Ewnetem one step a head,5,2025-01-17,Dashen Bank,Google Play Store,"{'sentiment_distilbert': 'NEGATIVE', 'score_di...",NEGATIVE,0.983263,neutral,neutral
1197,1197,It has a Good performance but need more upgrad...,4,2025-01-17,Dashen Bank,Google Play Store,"{'sentiment_distilbert': 'POSITIVE', 'score_di...",POSITIVE,0.996793,positive,positive
1198,1198,It is a very wonderful work that has saved its...,5,2025-01-17,Dashen Bank,Google Play Store,"{'sentiment_distilbert': 'POSITIVE', 'score_di...",POSITIVE,0.999881,positive,positive


In [7]:
# --- Aggregate Sentiment by Bank and Rating ---

print("\nAggregating sentiment by Bank and Rating (using DistilBERT scores)...")

# Map DistilBERT labels to numerical values for aggregation (e.g., positive=1, negative=-1, neutral=0)
sentiment_mapping = {'POSITIVE': 1, 'NEGATIVE': -1, 'neutral': 0} # Assuming 'neutral' is a possible label from DistilBERT
processed_df['distilbert_numerical'] = processed_df['distilbert_label'].map(sentiment_mapping).fillna(0) # Handle potential missing labels

# Aggregate mean sentiment score by bank and rating
sentiment_agg = processed_df.groupby(['bank', 'rating'])['distilbert_numerical'].mean().reset_index()

print("\nMean DistilBERT Sentiment by Bank and Rating:")
display(sentiment_agg)


Aggregating sentiment by Bank and Rating (using DistilBERT scores)...

Mean DistilBERT Sentiment by Bank and Rating:


Unnamed: 0,bank,rating,distilbert_numerical
0,Bank of Abyssinia (BOA),1,-0.817073
1,Bank of Abyssinia (BOA),2,-0.818182
2,Bank of Abyssinia (BOA),3,-0.16129
3,Bank of Abyssinia (BOA),4,0.0
4,Bank of Abyssinia (BOA),5,0.511364
5,Commercial Bank of Ethiopia (CBE),1,-0.625
6,Commercial Bank of Ethiopia (CBE),2,-0.375
7,Commercial Bank of Ethiopia (CBE),3,-0.52381
8,Commercial Bank of Ethiopia (CBE),4,0.076923
9,Commercial Bank of Ethiopia (CBE),5,0.695652


In [8]:
# Save the DataFrame with sentiment scores to CSV files
if not processed_df.empty:
    processed_df.to_csv('../data/processed/processed_bank_app_reviews_with_sentiment.csv')
    print("DataFrame with sentiment scores saved to processed_bank_app_reviews_with_sentiment.csv")

DataFrame with sentiment scores saved to processed_bank_app_reviews_with_sentiment.csv
