In [1]:
from google_play_scraper import reviews, Sort
import pandas as pd
import time

In [2]:
def scrape_google_play_reviews(app_package_name, max_reviews=5000, sleep_time=1.5):
    all_reviews = []
    next_token = None

    print(f"Starting to scrape reviews for: {app_package_name}")
    while len(all_reviews) < max_reviews:
        batch, next_token = reviews(
            app_package_name,
            lang='en',
            country='kh',
            sort=Sort.NEWEST,
            count=200,
            continuation_token=next_token
        )
        if not batch:
            print("No more reviews found.")
            break

        all_reviews.extend(batch)
        print(f"Fetched {len(all_reviews)} / {max_reviews} reviews...")

        if not next_token:
            break

        time.sleep(sleep_time)

    all_reviews = all_reviews[:max_reviews]
    df = pd.DataFrame(all_reviews)

    if df.empty:
        print("No reviews found.")
        return df

    expected_columns = ['reviewId', 'userName', 'content', 'score', 'at']
    missing_columns = [col for col in expected_columns if col not in df.columns]
    if missing_columns:
        print(f"Missing columns in DataFrame: {missing_columns}")
        print(f"Available columns: {list(df.columns)}")
        return df

    df = df[expected_columns]
    df.columns = ['review_id', 'username', 'content', 'rating', 'review_date']
    return df

In [4]:
app_package = 'net.omobio.smartsc'
reviews_df = scrape_google_play_reviews(app_package_name=app_package, max_reviews=60000)

Starting to scrape reviews for: net.omobio.smartsc
Fetched 200 / 60000 reviews...
Fetched 400 / 60000 reviews...
Fetched 600 / 60000 reviews...
Fetched 800 / 60000 reviews...
Fetched 1000 / 60000 reviews...
Fetched 1200 / 60000 reviews...
Fetched 1400 / 60000 reviews...
Fetched 1600 / 60000 reviews...
Fetched 1800 / 60000 reviews...
Fetched 2000 / 60000 reviews...
Fetched 2200 / 60000 reviews...
Fetched 2400 / 60000 reviews...
Fetched 2600 / 60000 reviews...
Fetched 2800 / 60000 reviews...
Fetched 3000 / 60000 reviews...
Fetched 3200 / 60000 reviews...
Fetched 3400 / 60000 reviews...
Fetched 3600 / 60000 reviews...
Fetched 3800 / 60000 reviews...
Fetched 4000 / 60000 reviews...
Fetched 4200 / 60000 reviews...
Fetched 4400 / 60000 reviews...
Fetched 4600 / 60000 reviews...
Fetched 4800 / 60000 reviews...
Fetched 5000 / 60000 reviews...
Fetched 5200 / 60000 reviews...
Fetched 5400 / 60000 reviews...
Fetched 5600 / 60000 reviews...
Fetched 5800 / 60000 reviews...
Fetched 6000 / 60000 revi

In [5]:
reviews_df.head()

Unnamed: 0,review_id,username,content,rating,review_date
0,0ca3b1b1-8512-4333-8da7-78ba86888b86,Phal Dara,👍👍,4,2025-07-25 12:45:15
1,99d1d5ff-6297-4a84-a79e-9ffdecb21959,Phanna A,poor internet 🤡,1,2025-07-24 07:11:41
2,ba1b5ceb-e7e2-4f12-b4f6-deae12129498,Razy U,ដូចអាចម៍,1,2025-07-24 00:01:59
3,765f97ba-945b-43ee-ac23-0341fe8712c1,Sopheak Sok,ដូចអាចម៍,5,2025-07-23 07:57:03
4,71eec25e-cd5a-46ab-9a4b-107bb57bfc26,RW Media,"very slow, take long time to start the app",2,2025-07-23 07:17:02


In [6]:
output_file = '../data/smartnas_customer_reviews.csv'
reviews_df.to_csv(output_file, index=False, encoding='utf-8-sig')
print(f"✅ Exported {len(reviews_df)} reviews to {output_file}")

✅ Exported 6924 reviews to ../data/smartnas_customer_reviews.csv
