In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import sys
from pathlib import Path

# Add script path
sys.path.append(str(Path().resolve().parent))

from scripts.analysis.text_cleaning import extract_keywords, assign_themes

sns.set(style="whitegrid")

In [None]:
# Load cleaned reviews
df = pd.read_csv("../data/clean/reviews_clean.csv")
df.head()

In [None]:
# Extract keywords
keyword_df = extract_keywords(df, max_features=20)
keyword_df.head(10)

In [None]:
# Assign themes
df_with_themes = assign_themes(df)
df_with_themes.head()

In [None]:
# Save results with themes
output_path = Path("../data/themes")
output_path.mkdir(parents=True, exist_ok=True)
df_with_themes.to_csv(output_path / "reviews_with_themes.csv", index=False)
print("✅ Saved: data/themes/reviews_with_themes.csv")

In [None]:
# Plot top themes per bank
top_themes = df_with_themes.groupby(["bank", "theme"]).size().reset_index(name="count")
plt.figure(figsize=(12, 6))
sns.barplot(data=top_themes, x="theme", y="count", hue="bank")
plt.title("Top Themes per Bank")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()