In [41]:
import pandas as pd
import sys, os

# Add src module path
sys.path.append(os.path.abspath("../src"))

# Import necessary functions
from preprocessing import preprocess_reviews
from theme_extraction import extract_keywords_tfidf
from theme_grouping import group_keywords_into_themes

# Load cleaned review data
dashen = pd.read_csv("../Data/Dashen_Bank_reviews_20250606_130804_cleaned.csv")
cbe = pd.read_csv("../Data/Commercial_Bank_of_Ethiopia_reviews_20250606_130816_cleaned.csv")
boa = pd.read_csv("../Data/Bank_of_Abyssinia_reviews_20250606_130818_cleaned.csv")

# Add bank labels
dashen["bank"] = "Dashen Bank"
cbe["bank"] = "Commercial Bank of Ethiopia"
boa["bank"] = "Bank of Abyssinia"

# Combine all reviews
all_reviews = pd.concat([dashen, cbe, boa], ignore_index=True)

import pandas as pd
import spacy
from spacy.lang.en.stop_words import STOP_WORDS

# Load spaCy's small English model (only tokenizer and lemmatizer needed)
nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"])

def preprocess_reviews(df: pd.DataFrame, text_column: str = "review") -> pd.DataFrame:
    """
    Preprocess review text: tokenization, lowercasing, stopword removal, lemmatization.
    Adds a new column 'cleaned_review' to the DataFrame.

    Parameters:
        df (pd.DataFrame): DataFrame with reviews.
        text_column (str): Name of column containing raw review text.

    Returns:
        pd.DataFrame: DataFrame with new 'cleaned_review' column.
    """
    cleaned_reviews = []

    for doc in nlp.pipe(df[text_column].astype(str), batch_size=50):
        tokens = [
            token.lemma_.lower()
            for token in doc
            if not token.is_stop and not token.is_punct and not token.is_space and token.is_alpha
        ]
        cleaned_reviews.append(" ".join(tokens))

    df["cleaned_review"] = cleaned_reviews
    return df



# ✅ Preprocess the review text (tokenization, stopword removal, lemmatization)
all_reviews = preprocess_reviews(all_reviews, text_column="review")

# Extract keywords using cleaned reviews
keywords = extract_keywords_tfidf(all_reviews, text_column="cleaned_review", top_n=50)

# Group extracted keywords into themes
theme_groups = group_keywords_into_themes(keywords)

# Display results
for bank, themes in theme_groups.items():
    print(f"\n🔹 {bank}")
    for theme, words in themes.items():
        print(f"  - {theme}: {', '.join(words)}")



🔹 Bank of Abyssinia
  - Other: good, work, bad, nice, bank, boa, crash, banking, mobile, like, ok, time, use, great, excellent, open, wow, fix, service, mobile banking, fast, poor, thank, developer, ነው, version, new, money, try, phone, amazing, properly, download, love, bug, experience
  - User Interface & Experience: app, good app, bad app, application, app work, banking app, easy
  - Feature Requests: update, need, option, developer option
  - Transaction Performance: slow, transaction, transfer

🔹 Commercial Bank of Ethiopia
  - Other: good, nice, work, like, ok, use, cbe, wow, bank, great, excellent, thank, fast, service, bad, time, banking, amazing, love, mobile, simple, money, account, new, mobile banking, perfect, problem, ethiopia, fix, best, ነው, በጣም, fantastic, network, send, developer
  - User Interface & Experience: app, good app, nice app, easy, application, easy use, great app, good application, like app
  - Feature Requests: update, need, option
  - Transaction Performan

In [44]:
# Convert the nested theme_groups dictionary to a flat list of rows
rows = []
for bank, themes in theme_groups.items():
    for theme, keywords in themes.items():
        rows.append({
            "bank": bank,
            "theme": theme,
            "keywords": ", ".join(keywords)
        })

# Create a DataFrame from the rows
theme_df = pd.DataFrame(rows)

# Export to CSV
theme_df.to_csv("../Data/thematic_grouping.csv", index=False)

print("✅ Saved: thematic_groupings.csv with cleaned + grouped keywords")


✅ Saved: thematic_groupings.csv with cleaned + grouped keywords


In [46]:
theme_df.head(20)

Unnamed: 0,bank,theme,keywords
0,Bank of Abyssinia,Other,"good, work, bad, nice, bank, boa, crash, banki..."
1,Bank of Abyssinia,User Interface & Experience,"app, good app, bad app, application, app work,..."
2,Bank of Abyssinia,Feature Requests,"update, need, option, developer option"
3,Bank of Abyssinia,Transaction Performance,"slow, transaction, transfer"
4,Commercial Bank of Ethiopia,Other,"good, nice, work, like, ok, use, cbe, wow, ban..."
5,Commercial Bank of Ethiopia,User Interface & Experience,"app, good app, nice app, easy, application, ea..."
6,Commercial Bank of Ethiopia,Feature Requests,"update, need, option"
7,Commercial Bank of Ethiopia,Transaction Performance,"transaction, transfer"
8,Dashen Bank,Other,"good, dashen, wow, bank, nice, super, fast, am..."
9,Dashen Bank,User Interface & Experience,"app, good app, easy, super app, application, e..."


🧠 Thematic Analysis Interpretation of Customer Reviews
We applied TF-IDF keyword extraction followed by manual theme grouping across user reviews of three major Ethiopian banking apps. The following insights emerged:

🔹 1. Common Themes Across All Banks
Each bank exhibited four primary themes:

User Interface & Experience (UI/UX): Keywords like app, good app, application, and easy highlight how users consistently comment on app usability and design.

Feature Requests: Terms such as update, need, and option show that users are actively requesting improvements or new functionalities.

Transaction Performance: Keywords like transaction, transfer, and slow suggest that performance—particularly in financial operations—is a consistent pain point.

Other: This category captures more general sentiment (e.g., good, nice, bad) or unrelated terms. These reviews are often vague but still valuable for tone assessment.

🔹 2. Bank-Specific Observations
🟦 Bank of Abyssinia
UI/UX concerns are fairly prominent, with repeated mentions of bad app and crash, indicating stability issues.

Transaction performance is discussed with keywords like slow, suggesting delays or failures.

Feature gaps like developer option were uniquely mentioned here, hinting at more technical user expectations.

🟧 Commercial Bank of Ethiopia (CBE)
App simplicity is appreciated (easy, nice), but transaction and feature issues are still present.

Positive feedback like wow and use appear frequently, suggesting a broader user base with varying experiences.

🟩 Dashen Bank
Receives relatively more positive sentiment (keywords like super, fast, amazing) in the “Other” category.

Still shares common concerns in UI/UX and feature requests, but the tone appears slightly more optimistic.

📌 Summary
This analysis shows:

A shared set of user priorities across all three apps (ease of use, features, transaction performance).

Opportunities to improve app stability, introduce requested features, and enhance transaction reliability.

Slight tone differences per bank, with Dashen skewing more positive and BOA more negative in sentiment.

These thematic insights should guide targeted UX and engineering improvements for each banking app.

In [4]:
# Step 1: Imports
import pandas as pd
import sys, os
sys.path.append(os.path.abspath("../src"))
from preprocessing import preprocess_reviews
from theme_extraction import extract_keywords_tfidf
from theme_grouping import group_keywords_into_themes
from theme_assignment import assign_theme_to_review

# Step 2: Define sentiment sources
sentiment_sources = {
    "DistilBERT": {
        "BOA": "../Data/BOA_reviews_with_sentiment_DistilBERT.csv",
        "CBE": "../Data/CBE_reviews_with_sentiment_DistilBERT.csv",
        "Dashen": "../Data/Dashen_Bank_reviews_with_sentiment_DistilBERT.csv"
    },
    "VADER": {
        "BOA": "../Data/BOA_reviews_with_sentiment_VADER.csv",
        "CBE": "../Data/CBE_reviews_with_sentiment_VADER.csv",
        "Dashen": "../Data/Dashen_Bank_reviews_with_sentiment_VADER.csv"
    },
    "TextBlob": {
        "BOA": "../Data/BOA_reviews_with_sentiment_TextBlob.csv",
        "CBE": "../Data/CBE_reviews_with_sentiment_TextBlob.csv",
        "Dashen": "../Data/Dashen_Bank_reviews_with_sentiment_TextBlob.csv"
    }
}

# Step 3: Process each sentiment method
for method, paths in sentiment_sources.items():
    print(f"\n🔁 Processing {method} sentiment...")

    # Load data
    boa = pd.read_csv(paths["BOA"])
    cbe = pd.read_csv(paths["CBE"])
    dashen = pd.read_csv(paths["Dashen"])

    # Add bank labels
    boa["bank"] = "Bank of Abyssinia"
    cbe["bank"] = "Commercial Bank of Ethiopia"
    dashen["bank"] = "Dashen Bank"

    # Combine all reviews
    all_reviews = pd.concat([boa, cbe, dashen], ignore_index=True)

    # Preprocess reviews
    all_reviews = preprocess_reviews(all_reviews, "review")

    # Extract keywords and group into themes (only once, could be moved outside loop if themes don't depend on sentiment)
    top_keywords = extract_keywords_tfidf(all_reviews, "cleaned_review", top_n=50)
    theme_groups = group_keywords_into_themes(top_keywords)

    # Assign theme to each review
    all_reviews["assigned_theme"] = all_reviews.apply(
        lambda row: assign_theme_to_review(row["cleaned_review"], row["bank"], theme_groups),
        axis=1
    )

    # Export final results
    final_df = all_reviews[["bank", "review", "sentiment_score", "sentiment_label", "assigned_theme"]]
    out_path = f"../Data/final_reviews_with_sentiment_and_themes_{method}.csv"
    final_df.to_csv(out_path, index=False)

    print(f"✅ Saved: {out_path}")
    from IPython.display import display
    print(f"\n🔍 Preview of {method} results:")
    display(final_df.head(10))



🔁 Processing DistilBERT sentiment...
✅ Saved: ../Data/final_reviews_with_sentiment_and_themes_DistilBERT.csv

🔍 Preview of DistilBERT results:


Unnamed: 0,bank,review,sentiment_score,sentiment_label,assigned_theme
0,Bank of Abyssinia,it's not working,0.999786,negative,Transaction Performance
1,Bank of Abyssinia,"Hello, I’m facing a problem with the BOA Mobil...",0.999415,negative,User Interface & Experience
2,Bank of Abyssinia,exceptional,0.999851,positive,Other
3,Bank of Abyssinia,BoA Mobile good bank,0.998473,positive,General Sentiment & Feedback
4,Bank of Abyssinia,this is worest app 24/7 loading,0.993474,negative,User Interface & Experience
5,Bank of Abyssinia,This App is not interest for Android phone Ple...,0.997138,negative,User Interface & Experience
6,Bank of Abyssinia,BoA system is confartable,0.998014,negative,Other
7,Bank of Abyssinia,very nice Abyssinia bank is choice all,0.999583,positive,User Interface & Experience
8,Bank of Abyssinia,"this app, for me , is a waste of time. It does...",0.999805,negative,User Interface & Experience
9,Bank of Abyssinia,Good service.,0.999853,positive,General Sentiment & Feedback



🔁 Processing VADER sentiment...
✅ Saved: ../Data/final_reviews_with_sentiment_and_themes_VADER.csv

🔍 Preview of VADER results:


Unnamed: 0,bank,review,sentiment_score,sentiment_label,assigned_theme
0,Bank of Abyssinia,it's not working,0.0,neutral,Transaction Performance
1,Bank of Abyssinia,"Hello, I’m facing a problem with the BOA Mobil...",-0.1884,negative,User Interface & Experience
2,Bank of Abyssinia,exceptional,0.0,neutral,Other
3,Bank of Abyssinia,BoA Mobile good bank,0.4404,positive,General Sentiment & Feedback
4,Bank of Abyssinia,this is worest app 24/7 loading,0.0,neutral,User Interface & Experience
5,Bank of Abyssinia,This App is not interest for Android phone Ple...,-0.0464,neutral,User Interface & Experience
6,Bank of Abyssinia,BoA system is confartable,0.0,neutral,Other
7,Bank of Abyssinia,very nice Abyssinia bank is choice all,0.4754,positive,User Interface & Experience
8,Bank of Abyssinia,"this app, for me , is a waste of time. It does...",-0.8491,negative,User Interface & Experience
9,Bank of Abyssinia,Good service.,0.4404,positive,General Sentiment & Feedback



🔁 Processing TextBlob sentiment...
✅ Saved: ../Data/final_reviews_with_sentiment_and_themes_TextBlob.csv

🔍 Preview of TextBlob results:


Unnamed: 0,bank,review,sentiment_score,sentiment_label,assigned_theme
0,Bank of Abyssinia,it's not working,0.0,neutral,Transaction Performance
1,Bank of Abyssinia,"Hello, I’m facing a problem with the BOA Mobil...",0.033333,neutral,User Interface & Experience
2,Bank of Abyssinia,exceptional,0.666667,positive,Other
3,Bank of Abyssinia,BoA Mobile good bank,0.7,positive,General Sentiment & Feedback
4,Bank of Abyssinia,this is worest app 24/7 loading,0.0,neutral,User Interface & Experience
5,Bank of Abyssinia,This App is not interest for Android phone Ple...,0.0,neutral,User Interface & Experience
6,Bank of Abyssinia,BoA system is confartable,0.0,neutral,Other
7,Bank of Abyssinia,very nice Abyssinia bank is choice all,0.78,positive,User Interface & Experience
8,Bank of Abyssinia,"this app, for me , is a waste of time. It does...",-0.016667,neutral,User Interface & Experience
9,Bank of Abyssinia,Good service.,0.7,positive,General Sentiment & Feedback
