In [5]:
# scrape_playstore_bulk.py

from google_play_scraper import reviews, Sort, app  # install via: pip install google-play-scraper
import pandas as pd
import os
import time

# Mapping from policy-domains to example Play Store apps (package IDs)
policy_apps = {
    # Education
    "Education": [
        ("Diksha", "in.gov.diksha.app")
    ],
    # Health
    "Health": [
        ("Aarogya Setu", "nic.goi.aarogyasetu"),
        ("ABHA Health ID", "in.gov.abdm.abha"),
    ],
    "Urban_Infrastructure": [
        ("Smart Cities App", "com.smartcity.app.example"),  # sample placeholder
    ],
    # Agriculture / Farmer Support
    "Agriculture": [
        ("Kisan Suvidha", "nic.kisansuvidha"), 
    ],
    # Environment / Clean Mobility / Utility
    "Environment": [
        ("MyGov India", "in.mygov"),
    ],
    # Digital Governance & Law / Financial Inclusion / Welfare
    "Digital_Governance": [
        ("DigiLocker", "com.digilocker.android"),
        ("UMANG", "in.gov.umang.negd.g2c"),
        ("PMJanDhan", "in.gov.pmjdy.app"),   # sample placeholder
    ],
    # Economy & Welfare / Social Security
    "Economy_Welfare": [
        ("PM-KISAN", "in.gov.pmkisan"),  # sample placeholder
    ]
}

output_rows = []

print("Starting Play Store bulk scraping...")

for domain, apps in policy_apps.items():
    for app_name, pkg in apps:
        print(f"\nScraping reviews for app: {app_name} — domain: {domain}")
        try:
            result, _ = reviews(
                pkg,
                lang="en",
                country="in",
                sort=Sort.NEWEST,
                count=200
            )
        except Exception as e:
            print(f"⚠ Error fetching {app_name} ({pkg}): {e}")
            continue

        for r in result:
            output_rows.append({
                "policy_domain": domain,
                "app_name": app_name,
                "package_name": pkg,
                "review_text": r.get("content", ""),
                "rating": r.get("score", None)
            })

        # polite delay to avoid rate limiting
        time.sleep(1)

# Save to CSV
os.makedirs("dataset", exist_ok=True)
df = pd.DataFrame(output_rows)
out_path = r"C:\Users\Ayush Ahlawat\OneDrive\Documents\Public Comment Analysis\public-comment-analysis\dataset\playstore_policy_reviews.csv"
df.to_csv(out_path, index=False, encoding="utf-8-sig")

print("\n✅ Done scraping. Reviews saved at:", out_path)
print("Total reviews collected:", len(df))


Starting Play Store bulk scraping...

Scraping reviews for app: Diksha — domain: Education

Scraping reviews for app: Aarogya Setu — domain: Health

Scraping reviews for app: ABHA Health ID — domain: Health

Scraping reviews for app: Smart Cities App — domain: Urban_Infrastructure

Scraping reviews for app: Kisan Suvidha — domain: Agriculture

Scraping reviews for app: MyGov India — domain: Environment

Scraping reviews for app: DigiLocker — domain: Digital_Governance

Scraping reviews for app: UMANG — domain: Digital_Governance

Scraping reviews for app: PMJanDhan — domain: Digital_Governance

Scraping reviews for app: PM-KISAN — domain: Economy_Welfare

✅ Done scraping. Reviews saved at: C:\Users\Ayush Ahlawat\OneDrive\Documents\Public Comment Analysis\public-comment-analysis\dataset\playstore_policy_reviews.csv
Total reviews collected: 800
