In [2]:
%pip install google-play-scraper pandas




In [3]:
from google_play_scraper import reviews
import pandas as pd
from google_play_scraper import Sort

# Define BOA package ID
app_id = "com.boa.boaMobileBanking"

# Function to scrape reviews for a given language
def scrape_reviews(app_id, bank_name, lang):
    result, _ = reviews(
        app_id,
        lang=lang,  # Language: "en" for English, "am" for Amharic
        country="ET",
        count=600,  # Fetch 600 reviews
        sort=Sort.NEWEST
    )
    df = pd.DataFrame(result)
    df["bank"] = bank_name
    df["source"] = "Google Play"
    df["language"] = lang  # Track review language
    return df

# Fetch English & Amharic reviews
df_en = scrape_reviews(app_id, "BOA", "en")
df_am = scrape_reviews(app_id, "BOA", "am")

# Merge datasets
df = pd.concat([df_en, df_am], ignore_index=True)

# Rename columns for consistency
df.rename(columns={"content": "review", "score": "rating", "at": "date"}, inplace=True)

# Ensure correct column ordering
columns = ["review", "rating", "date", "bank", "source"]
df = df[columns]

# Save cleaned data
df.to_csv("BOA_bank_reviews.csv", index=False)
print("✅ BOA reviews saved successfully as BOA_bank_reviews.csv (English + Amharic).")
# Display the first few rows of the DataFrame
print(df.head())




✅ BOA reviews saved successfully as BOA_bank_reviews.csv (English + Amharic).
                                              review  rating  \
0                                   it's not working       3   
1  Hello, I’m facing a problem with the BOA Mobil...       1   
2                                        exceptional       5   
3                               BoA Mobile good bank       5   
4                    this is worest app 24/7 loading       1   

                 date bank       source  
0 2025-06-05 11:57:36  BOA  Google Play  
1 2025-06-03 16:21:34  BOA  Google Play  
2 2025-06-03 10:32:35  BOA  Google Play  
3 2025-06-02 14:25:12  BOA  Google Play  
4 2025-06-01 15:51:10  BOA  Google Play  


In [4]:
# Drop duplicate reviews
df.drop_duplicates(subset=["review"], inplace=True)

# Remove empty reviews
df.dropna(subset=["review"], inplace=True)


In [5]:
df["date"] = pd.to_datetime(df["date"]).dt.strftime("%Y-%m-%d")


In [7]:
columns = ["review", "rating", "date", "bank", "source"]
df = df[columns]

# Save cleaned dataset
df.to_csv("BOA_bank_reviews_clean.csv", index=False)
print("✅ BOA cleaned reviews saved as BOA_bank_reviews_clean.csv")
# Display the cleaned DataFrame
print(df.head())



✅ BOA cleaned reviews saved as BOA_bank_reviews_clean.csv
                                              review  rating        date bank  \
0                                   it's not working       3  2025-06-05  BOA   
1  Hello, I’m facing a problem with the BOA Mobil...       1  2025-06-03  BOA   
2                                        exceptional       5  2025-06-03  BOA   
3                               BoA Mobile good bank       5  2025-06-02  BOA   
4                    this is worest app 24/7 loading       1  2025-06-01  BOA   

        source  
0  Google Play  
1  Google Play  
2  Google Play  
3  Google Play  
4  Google Play  
