In [1]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer

# Load NLP model
nlp = spacy.load("en_core_web_sm")

# Load sentiment dataset
df = pd.read_csv(r"C:\10 Kifia Tasks\Week-2\Customer-Experience-Analytics-for-Fintech-Apps\notebooks\Sentiment_Analysis\DASHEN_sentiment_analysis.csv")

# Preprocess text (lemmatization & stopword removal)
df["clean_review"] = df["review"].apply(lambda x: " ".join([token.lemma_ for token in nlp(str(x)) if not token.is_stop]))

# Extract top keywords using TF-IDF
vectorizer = TfidfVectorizer(max_features=50)
tfidf_matrix = vectorizer.fit_transform(df["clean_review"])
keywords = vectorizer.get_feature_names_out()

print("🔹 Top extracted keywords:", keywords)


🔹 Top extracted keywords: ['account' 'ahead' 'amazing' 'app' 'application' 'bank' 'banking'
 'changer' 'chat' 'commerce' 'convenient' 'customer' 'dashen' 'digital'
 'easy' 'excellent' 'experience' 'fast' 'feature' 'friendly' 'game' 'good'
 'great' 'highly' 'interface' 'like' 'love' 'make' 'mobile' 'need' 'nice'
 'payment' 'platform' 'recommend' 'reliable' 'seamless' 'secure' 'service'
 'smooth' 'step' 'super' 'supper' 'thank' 'time' 'transaction' 'transfer'
 'use' 'user' 'work' 'wow']


In [2]:
import pandas as pd
import spacy

# Load NLP model
nlp = spacy.load("en_core_web_sm")

# Load sentiment dataset
df = pd.read_csv(r"C:\10 Kifia Tasks\Week-2\Customer-Experience-Analytics-for-Fintech-Apps\notebooks\Sentiment_Analysis\DASHEN_sentiment_analysis.csv")

# Preprocess text (remove stopwords & lemmatize)
def preprocess(text):
    doc = nlp(str(text))
    return " ".join([token.lemma_ for token in doc if not token.is_stop])

df["clean_review"] = df["review"].apply(preprocess)

# Save preprocessed results
df.to_csv("DASHEN_preprocessed_reviews.csv", index=False)
print("✅ Text preprocessing complete! Saved as DASHEN_preprocessed_reviews.csv.")


✅ Text preprocessing complete! Saved as DASHEN_preprocessed_reviews.csv.


In [3]:
# Define rule-based keyword groups
themes = {
    "Account Access Issues": ["login", "password", "authentication", "error", "access"],
    "Transaction Performance": ["transfer", "delay", "slow", "processing", "speed"],
    "User Interface & Experience": ["UI", "design", "navigation", "intuitive", "easy"],
    "Customer Support": ["support", "help", "response", "service"],
    "Feature Requests": ["fingerprint", "notification", "new features", "budgeting"]
}

# Assign themes to reviews
def categorize_theme(review):
    matched_themes = [theme for theme, keywords in themes.items() if any(word in review for word in keywords)]
    return matched_themes if matched_themes else ["Other"]

df["identified_theme"] = df["clean_review"].apply(categorize_theme)
df.to_csv("DASHEN_thematic_analysis.csv", index=False)

print("✅ Thematic clustering completed! Results saved in DASHEN_thematic_analysis.csv.")


✅ Thematic clustering completed! Results saved in DASHEN_thematic_analysis.csv.
