In [4]:
import pandas as pd
from google_play_scraper import app, Sort, reviews_all
from datetime import datetime

In [10]:
from google_play_scraper import search

# First search for the apps to get correct IDs
print("Searching for banking apps...")
results = search("Commercial Bank of Ethiopia", lang="en", country="et")
for result in results:
    print(f"Title: {result['title']}\nApp ID: {result['appId']}\n")

Searching for banking apps...
Title: Commercial Bank of Ethiopia
App ID: com.combanketh.mobilebanking

Title: EthioDirect
App ID: com.combanketh.ethiodirect

Title: CBEBirr Plus
App ID: prod.cbe.birr

Title: CBE Soft Token
App ID: com.cbe.softtoken

Title: Chase Mobile
App ID: com.chase.sig.android

Title: Kotak Bank: 811 Mobile App
App ID: com.kotak811mobilebankingapp.instantsavingsupiscanandpayrecharge

Title: Revolut
App ID: com.revolut.revolut

Title: telebirr
App ID: cn.tydic.ethiopay

Title: CBD - Instant digital banking
App ID: com.cbd.mobile

Title: M-PESA
App ID: com.safaricom.mpesa.lifestyle

Title: ADCB
App ID: com.adcb.bank



In [12]:
from google_play_scraper import search

# First search for the apps to get correct IDs
print("Searching for banking apps...")
results = search("Bank of Abyssinia (BOA)", lang="en", country="et")
for result in results:
    print(f"Title: {result['title']}\nApp ID: {result['appId']}\n")

Searching for banking apps...
Title: BoA Mobile
App ID: com.boa.boaMobileBanking

Title: BOA 2FA
App ID: com.boa.verify

Title: Commercial Bank of Ethiopia
App ID: com.combanketh.mobilebanking

Title: Apollo
App ID: com.boa.apollo

Title: CBEBirr Plus
App ID: prod.cbe.birr

Title: Dashen Bank
App ID: com.dashen.dashensuperapp

Title: Apollo Merchant
App ID: apollo.boa.com

Title: telebirr
App ID: cn.tydic.ethiopay

Title: AwashBIRR Pro
App ID: com.sc.awashpay

Title: BOA School
App ID: com.boa.school

Title: Fayda ID | የፋይዳ መታወቂያ
App ID: et.fayda.nidfaydaApp

Title: EthioDirect
App ID: com.combanketh.ethiodirect

Title: BOA Mobile Wallet
App ID: com.mode.boau.ui

Title: Chase Mobile
App ID: com.chase.sig.android

Title: Ethiopian Airlines
App ID: com.ethiopianairlines.ethiopianairlines

Title: ABA Mobile
App ID: com.paygo24.ibank

Title: SeaBank PH - Fast&Easy Banking
App ID: ph.seabank.seabank

Title: Monzo Bank - Mobile Banking
App ID: co.uk.getmondo

Title: Ebirr
App ID: com.safarif

In [27]:
# Configuration
BANKS = {
    "Commercial Bank of Ethiopia": "com.combanketh.mobilebanking",
    "Abyssiniya Bank": "com.boa.boaMobileBanking",
    "Dashen Bank": "com.dashen.dashensuperapp"
}
REVIEWS_PER_BANK = 500  # Target minimum
OUTPUT_FILE = "../../data/ethiopian_bank_reviews.csv"


In [23]:
def scrape_bank_reviews():
    all_reviews = []
    for bank_name, app_id in BANKS.items():
        print(f"Scraping {bank_name}...")
        try:
            app_info = app(app_id)
            print(f"Found: {app_info['title']} ({app_info['score']} ★)")
        except Exception as e:
            print(f"Error getting app info: {e}")
            continue

        reviews = reviews_all(
            app_id,
            sleep_milliseconds=100,
            lang='en',
            # country='et',
            sort=Sort.NEWEST,
            filter_score_with=None
        )

        print(reviews[2])
        
        for r in reviews[:REVIEWS_PER_BANK]:
            all_reviews.append({
                'review': r['content'],
                'rating': r['score'],
                'date': r['at'].strftime('%Y-%m-%d'),
                'bank': bank_name,
                'source': 'Google Play'
            })
        
        print(f"Collected {len(reviews)} reviews for {bank_name}")
    
    return pd.DataFrame(all_reviews)

In [24]:
def preprocess_reviews(df):
    initial_count = len(df)
    df = df.drop_duplicates(subset=['review', 'bank'])
    print(f"Removed {initial_count - len(df)} duplicates")
    
    df = df.dropna(subset=['review'])
    df['rating'] = df['rating'].fillna(0)
    

    df['review'] = df['review'].str.strip()
    df = df[df['review'].str.len() > 5] 
    
    return df

In [28]:
if __name__ == "__main__":
    # Scrape and save data
    reviews_df = scrape_bank_reviews()
    print(f"\nCollected {len(reviews_df)} reviews from Google Play Store")
    processed_df = preprocess_reviews(reviews_df)
    
    # Save to CSV
    processed_df.to_csv(OUTPUT_FILE, index=False)
    print(f"\nSaved {len(processed_df)} reviews to {OUTPUT_FILE}")
    print(processed_df.head())

Scraping Commercial Bank of Ethiopia...
Found: Commercial Bank of Ethiopia (4.3465347 ★)
{'reviewId': 'ed5c359a-c50a-4f6b-bb57-c5a77c27beaa', 'userName': 'A Google user', 'userImage': 'https://play-lh.googleusercontent.com/EGemoI2NTXmTsBVtJqk8jxF9rh8ApRWfsIMQSt2uE4OcpQqbFu7f7NbTK05lx80nuSijCz7sc3a277R67g', 'content': 'it is so amazing app. but, it is better to update it to access without any internet fees. just make it to open by free internet service when we open data connection.', 'score': 5, 'thumbsUpCount': 0, 'reviewCreatedVersion': '5.1.0', 'at': datetime.datetime(2025, 6, 9, 16, 20, 6), 'replyContent': None, 'repliedAt': None, 'appVersion': '5.1.0'}
Collected 7509 reviews for Commercial Bank of Ethiopia
Scraping Abyssiniya Bank...
Found: BoA Mobile (2.73 ★)
{'reviewId': '3559b91c-fad9-4032-bebe-cf99974b9628', 'userName': 'Robel Alebachew', 'userImage': 'https://play-lh.googleusercontent.com/a/ACg8ocKc3cWFE6DB-KelsfBZrSQ5KjVQ7TztXXm7wELqjMcFMLChYw=mo', 'content': 'Hello, I’m faci