In [None]:
from google_play_scraper import reviews, Sort          # use reviews api to fetch reviews 
import pandas as pd
from datetime import datetime, timedelta, date
import time

app_ids = [
    'com.smartspends',
    'com.nextbillion.groww',
    'in.indwealth',
]

lookback_days = 5

# Use date-only boundaries to avoid timezone/seconds edge cases
end_date_date = datetime.now().date()
start_date_date = (datetime.now() - timedelta(days=lookback_days)).date()

print(f"Fetching reviews from {start_date_date} to {end_date_date}")

all_apps_reviews = []

def fetch_reviews_for_app(app_id, start_d, end_d, page_count=200, max_retries=3):
    continuation_token = None           # use to fetch next pages 
    collected = []                      # stores reviews that fall in the date range
    seen_ids = set()                    # to avoid duplicates
    attempt = 0                         # retry counter

    while True:
        # fetch with retry
        for attempt in range(max_retries):
            try:
                result, continuation_token = reviews(
                    app_id,
                    lang="en",               # language
                    country="in",            # country
                    sort=Sort.NEWEST,        # sort by newest reviews
                    count=page_count,        # no. of reviews to fetch per request
                    continuation_token=continuation_token      # for pagination
                )
                break
            except Exception as e:
                wait = (2 ** attempt)        # wailt 2 seconds attempt
                print(f"Fetch error for {app_id}: {e} — retrying in {wait}s...")
                time.sleep(wait)
        else:
            # exhausted retries
            print(f"Failed to fetch page after {max_retries} tries for {app_id}. Stopping.")
            break

        if not result:
            break

        # process current page (dedupe + date filter)
        page_has_relevant = False
        for r in result:
            # create stable id for dedupe (use reviewId if present)
            rid = r.get('reviewId') or f"{r.get('userName')}|{r.get('at')}|{(r.get('content') or '')[:50]}"
            if rid in seen_ids:
                continue
            seen_ids.add(rid)

            # r['at'] is a datetime object from google_play_scraper; compare dates
            r_date = r['at'].date()
            if start_d <= r_date <= end_d:
                collected.append(r)
                page_has_relevant = True

        # If the oldest review in this page is older than start_date, we can stop.
        # Use date comparison to avoid missing due to small timezone/second differences.
        try:
            oldest_in_page_date = result[-1]['at'].date()
        except Exception:
            # if something odd, break to be safe
            break

        if oldest_in_page_date < start_d:
            # we've reached reviews older than our window; no need to fetch further pages
            break

        # No continuation token -> no more pages
        if not continuation_token:
            break

        # polite delay between requests
        time.sleep(1.2)

    return collected

for app_id in app_ids:
    print(f"\nFetching reviews for {app_id} ...")
    app_reviews = fetch_reviews_for_app(app_id, start_date_date, end_date_date, page_count=200)
    if app_reviews:
        df = pd.DataFrame(app_reviews)
        df = df[[ 'userName', 'score', 'content', 'at']].rename(columns={
            'userName': 'user',
            'score': 'rating',
            'content': 'review',
            'at': 'time'
        })
        # final safety filter (by date)
        df = df[(df['time'].dt.date >= pd.to_datetime(start_date_date).date()) &
                (df['time'].dt.date <= pd.to_datetime(end_date_date).date())]
        df["app_id"] = app_id
        all_apps_reviews.append(df)
        print(f"{app_id}: Collected {len(df)} reviews in the date range")
    else:
        print(f"No reviews found for {app_id} in the date range.")

if all_apps_reviews:
    df_final = pd.concat(all_apps_reviews, ignore_index=True)
    file_name = "dailyreview__.csv"
    df_final.to_csv(file_name, index=False)
    print(f"\n✅ Saved {len(df_final)} reviews into {file_name}")
else:
    print("\n⚠️ No reviews collected for any app.")
