In [1]:
from google_play_scraper import reviews
import pandas as pd
from google_play_scraper import Sort

# Define DASHEN package ID
app_id = "com.dashen.dashensuperapp"

# Function to scrape reviews for a given language
def scrape_reviews(app_id, bank_name, lang):
    result, _ = reviews(
        app_id,
        lang=lang,  # Language: "en" for English, "am" for Amharic
        country="ET",
        count=600,  # Fetch 600 reviews
        sort=Sort.NEWEST
    )
    df = pd.DataFrame(result)
    df["bank"] = bank_name
    df["source"] = "Google Play"
    df["language"] = lang  # Track review language
    return df

# Fetch English & Amharic reviews
df_en = scrape_reviews(app_id, "DASHEN", "en")
df_am = scrape_reviews(app_id, "DASHEN", "am")

# Merge datasets
df = pd.concat([df_en, df_am], ignore_index=True)

# Rename columns for consistency
df.rename(columns={"content": "review", "score": "rating", "at": "date"}, inplace=True)

# Ensure correct column ordering
columns = ["review", "rating", "date", "bank", "source"]
df = df[columns]

# Save cleaned data
df.to_csv("DASHEN_bank_reviews.csv", index=False)
print("✅ DASHEN reviews saved successfully as DASHEN_bank_reviews.csv (English + Amharic).")
# Display the first few rows of the DataFrame
print(df.head())




✅ DASHEN reviews saved successfully as DASHEN_bank_reviews.csv (English + Amharic).
                                              review  rating  \
0  I like this mobile banking app very much. Over...       2   
1                                               love       3   
2                                               መቸሸጠ       5   
3                                                wow       5   
4                                              gadaa       5   

                 date    bank       source  
0 2025-06-07 10:40:29  DASHEN  Google Play  
1 2025-06-06 00:15:44  DASHEN  Google Play  
2 2025-06-03 19:40:31  DASHEN  Google Play  
3 2025-06-03 17:30:11  DASHEN  Google Play  
4 2025-06-01 17:10:53  DASHEN  Google Play  


In [3]:
# Drop duplicate reviews
df.drop_duplicates(subset=["review"], inplace=True)

# Remove empty reviews
df.dropna(subset=["review"], inplace=True)


In [4]:
df["date"] = pd.to_datetime(df["date"]).dt.strftime("%Y-%m-%d")


In [5]:
columns = ["review", "rating", "date", "bank", "source"]
df = df[columns]

# Save cleaned dataset
df.to_csv("DASHEN_bank_reviews_clean.csv", index=False)
print("✅ DASHEN cleaned reviews saved as DASHEN_bank_reviews_clean.csv")
# Display the cleaned DataFrame
print(df.head())



✅ DASHEN cleaned reviews saved as DASHEN_bank_reviews_clean.csv
                                              review  rating        date  \
0  I like this mobile banking app very much. Over...       2  2025-06-07   
1                                               love       3  2025-06-06   
2                                               መቸሸጠ       5  2025-06-03   
3                                                wow       5  2025-06-03   
4                                              gadaa       5  2025-06-01   

     bank       source  
0  DASHEN  Google Play  
1  DASHEN  Google Play  
2  DASHEN  Google Play  
3  DASHEN  Google Play  
4  DASHEN  Google Play  
