 Import libraries and set path

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import schedule
import time
import os
from pathlib import Path
import sys
sys.path.append(str(Path().resolve().parent))

from scripts.scraper import scrape_reviews, save_raw_data
from scripts.play_store_scraper import scrape_play_store_reviews


# Visualization style
sns.set(style="whitegrid")


In [None]:
#  Define App Information
app_ids = [
    'com.ethiopian.cbe.mobile',
    'com.boa.boamobile',
    'com.teklogix.amole',
]
app_names = ['CBE', 'BOA', 'Dashen']

In [None]:
# Scrape Reviews
print("📥 Scraping reviews from Google Play Store...")
df_raw = scrape_reviews(app_ids, app_names, review_count=400)
print(f"✅ Scraped {df_raw.shape[0]} total reviews")

In [None]:
# 4. Save Raw Data
save_raw_data(df_raw)

# 5. Preview Raw Data
df_raw.head()

Fetch and save reviews

In [None]:

# Different scheduling options (uncomment the one you want to use):
schedule.every().day.at("01:00").do(scrape_play_store_reviews)  # Daily at 1 AM

while True:
    schedule.run_pending()
    time.sleep(1)

Add Preprocessing 

In [None]:
from scripts.preprocessing import load_raw_reviews, preprocess_reviews, save_clean_reviews

# Load raw
df_raw = load_raw_reviews()

# Preprocess
df_clean = preprocess_reviews(df_raw)

# Save cleaned version
save_clean_reviews(df_clean)

# Quick preview
df_clean.head()


In [None]:
total = len(df_clean)
missing_pct = df_clean.isnull().sum() / total * 100
print(f"Total reviews: {total}")
print("Missing data percentage per column:\n", missing_pct)
