Installing required Libraries

In [None]:
%pip install google-play-scraper pandas



Intialize the scraper

In [6]:
from google_play_scraper import reviews
import pandas as pd
from google_play_scraper import Sort

# Define CBE package ID
app_id = "com.combanketh.mobilebanking"

# Function to scrape reviews for a given language
def scrape_reviews(app_id, bank_name, lang):
    result, _ = reviews(
        app_id,
        lang=lang,  # Language: "en" for English, "am" for Amharic
        country="ET",
        count=600,  # Fetch 600 reviews
        sort=Sort.NEWEST
    )
    df = pd.DataFrame(result)
    df["bank"] = bank_name
    df["source"] = "Google Play"
    df["language"] = lang  # Track review language
    return df

# Fetch English & Amharic reviews
df_en = scrape_reviews(app_id, "CBE", "en")
df_am = scrape_reviews(app_id, "CBE", "am")

# Merge datasets
df = pd.concat([df_en, df_am], ignore_index=True)

# Rename columns for consistency
df.rename(columns={"content": "review", "score": "rating", "at": "date"}, inplace=True)

# Ensure correct column ordering
columns = ["review", "rating", "date", "bank", "source"]
df = df[columns]

# Save cleaned data
df.to_csv("CBE_bank_reviews.csv", index=False)
print("✅ CBE reviews saved successfully as CBE_bank_reviews.csv (English + Amharic).")
# Display the first few rows of the DataFrame
print(df.head())




✅ CBE reviews saved successfully as CBE_bank_reviews.csv (English + Amharic).


Finalizing Step 1: Cleaning & Formatting

✅ 1️⃣ Remove Duplicates & Missing Values

In [7]:
# Drop duplicate reviews
df.drop_duplicates(subset=["review"], inplace=True)

# Remove empty reviews
df.dropna(subset=["review"], inplace=True)


✅ 2️⃣ Normalize Date Format

In [8]:
df["date"] = pd.to_datetime(df["date"]).dt.strftime("%Y-%m-%d")


✅ 3️⃣ Verify Column Order & Save Final CSV

In [9]:
columns = ["review", "rating", "date", "bank", "source"]
df = df[columns]

# Save cleaned dataset
df.to_csv("CBE_bank_reviews_clean.csv", index=False)
print("✅ CBE cleaned reviews saved as CBE_bank_reviews_clean.csv")
# Display the cleaned DataFrame
print(df.head())



✅ CBE cleaned reviews saved as CBE_bank_reviews_clean.csv
                                              review  rating        date bank  \
0  really am happy to this app it is Siple to use...       5  2025-06-07  CBE   
1  I liked this app. But the User interface is ve...       2  2025-06-07  CBE   
2  "Why don’t your ATMs support account-to-accoun...       4  2025-06-06  CBE   
3                        what is this app problem???       1  2025-06-05  CBE   
4       the app is proactive and a good connections.       5  2025-06-05  CBE   

        source  
0  Google Play  
1  Google Play  
2  Google Play  
3  Google Play  
4  Google Play  
