In [12]:
from google_play_scraper import reviews, app
import pandas as pd
from datetime import datetime


In [13]:
apps = {
    "Commercial Bank of Ethiopia": "com.combanketh.mobilebanking",
    "Bank of Abyssinia": "com.boa.boaMobileBanking",  
    "Awash Bank": "dashen.dashensuperapp"  
}

In [14]:
all_reviews = []

for bank_name, app_id in apps.items():
    result, _ = reviews(
        app_id,
        lang='en',
        country='et',
        count=600,  #slightly more to avoid loss after cleaning
    )
    for r in result:
        all_reviews.append({
            "review": r["content"],
            "rating": r["score"],
            "date": r["at"],
            "bank": bank_name,
            "source": "Google Play"
        })


In [15]:
df = pd.DataFrame(all_reviews)

# Drop duplicates
df.drop_duplicates(subset=["review", "rating", "date", "bank"], inplace=True)

# Handle missing data (drop rows with empty review or rating)
df.dropna(subset=["review", "rating", "date"], inplace=True)

# Normalize date to YYYY-MM-DD
df["date"] = pd.to_datetime(df["date"]).dt.strftime("%Y-%m-%d")


In [16]:
df.to_csv("bank_reviews.csv", index=False)
print("✅ Data saved to bank_reviews.csv with", len(df), "reviews.")


✅ Data saved to bank_reviews.csv with 1200 reviews.
