In [7]:
import pandas as pd
from google_play_scraper import Sort, reviews
from datetime import datetime

# Function to fetch reviews
def fetch_reviews(app_id, bank_name, count=400):
    all_reviews = []
    batch_size = 100  # Max reviews per request

    for offset in range(0, count, batch_size):
        result, _ = reviews(
            app_id,
            lang='en',
            country='us',
            sort=Sort.NEWEST,
            count=batch_size,
            filter_score_with=None  # Get all reviews
        )
        for r in result:
            all_reviews.append({
                'review': r['content'],
                'rating': r['score'],
                'date': r['at'].date().isoformat(),
                'bank': bank_name,
                'source': 'Google Play'
            })
    return pd.DataFrame(all_reviews)

# App IDs for banks
apps = {
    'Commercial Bank of Ethiopia': 'com.cbe.customerapp',
    'Bank of Abyssinia': 'com.boa.boamobileapp',
    'Dashen Bank': 'com.mis.dashenmobilebanking'
}

# Collect and combine data
all_data = pd.DataFrame()
for bank, app_id in apps.items():
    print(f"Scraping {bank}...")
    df = fetch_reviews(app_id, bank, count=500)
    all_data = pd.concat([all_data, df], ignore_index=True)

# Drop duplicates & save
all_data.drop_duplicates(inplace=True)
all_data.to_csv('ethiopian_banks_reviews.csv', index=False)
print("Saved as ethiopian_banks_reviews.csv")


Scraping Commercial Bank of Ethiopia...
Scraping Bank of Abyssinia...
Scraping Dashen Bank...
Saved as ethiopian_banks_reviews.csv
