In [1]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer

# Load NLP model
nlp = spacy.load("en_core_web_sm")

# Load sentiment dataset
df = pd.read_csv(r"C:\10 Kifia Tasks\Week-2\Customer-Experience-Analytics-for-Fintech-Apps\notebooks\Sentiment_Analysis\BOA_sentiment_analysis.csv")

# Preprocess text (lemmatization & stopword removal)
df["clean_review"] = df["review"].apply(lambda x: " ".join([token.lemma_ for token in nlp(str(x)) if not token.is_stop]))

# Extract top keywords using TF-IDF
vectorizer = TfidfVectorizer(max_features=50)
tfidf_matrix = vectorizer.fit_transform(df["clean_review"])
keywords = vectorizer.get_feature_names_out()

print("üîπ Top extracted keywords:", keywords)


üîπ Top extracted keywords: ['android' 'app' 'application' 'ask' 'bad' 'bank' 'banking' 'boa' 'bug'
 'crash' 'customer' 'day' 'developer' 'download' 'error' 'ethiopia'
 'experience' 'fix' 'good' 'great' 'issue' 'keep' 'know' 'like' 'long'
 'mobile' 'money' 'need' 'new' 'nice' 'open' 'option' 'phone' 'poor' 'say'
 'service' 'slow' 'take' 'time' 'transaction' 'transfer' 'try' 'turn'
 'update' 'use' 'user' 'version' 'well' 'work' '·äê·ãç']


In [3]:
import pandas as pd
import spacy

# Load NLP model
nlp = spacy.load("en_core_web_sm")

# Load sentiment dataset
df = pd.read_csv(r"C:\10 Kifia Tasks\Week-2\Customer-Experience-Analytics-for-Fintech-Apps\notebooks\Sentiment_Analysis\BOA_sentiment_analysis.csv")

# Preprocess text (remove stopwords & lemmatize)
def preprocess(text):
    doc = nlp(str(text))
    return " ".join([token.lemma_ for token in doc if not token.is_stop])

df["clean_review"] = df["review"].apply(preprocess)

# Save preprocessed results
df.to_csv("BOA_preprocessed_reviews.csv", index=False)
print("‚úÖ Text preprocessing complete! Saved as BOA_preprocessed_reviews.csv.")


‚úÖ Text preprocessing complete! Saved as BOA_preprocessed_reviews.csv.


In [2]:
# Define rule-based keyword groups
themes = {
    "Account Access Issues": ["login", "password", "authentication", "error", "access"],
    "Transaction Performance": ["transfer", "delay", "slow", "processing", "speed"],
    "User Interface & Experience": ["UI", "design", "navigation", "intuitive", "easy"],
    "Customer Support": ["support", "help", "response", "service"],
    "Feature Requests": ["fingerprint", "notification", "new features", "budgeting"]
}

# Assign themes to reviews
def categorize_theme(review):
    matched_themes = [theme for theme, keywords in themes.items() if any(word in review for word in keywords)]
    return matched_themes if matched_themes else ["Other"]

df["identified_theme"] = df["clean_review"].apply(categorize_theme)
df.to_csv("BOA_thematic_analysis.csv", index=False)

print("‚úÖ Thematic clustering completed! Results saved in BOA_thematic_analysis.csv.")


‚úÖ Thematic clustering completed! Results saved in BOA_thematic_analysis.csv.
