Install & Import

In [None]:
pip install -q google-play-scraper tqdm

from google_play_scraper import app, reviews, Sort
from tqdm import tqdm
from datetime import datetime
import csv, json, time, os

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h

Parameters

In [None]:
APP_ID = "com.openai.chatgpt"
LANG = "en"
COUNTRY = "us"
SORT = Sort.NEWEST
PAGE_SIZE = 200
SLEEP_MS = 250
CHUNK = 200

In [None]:
def to_serializable_row(r):
    d = dict(r)
    for k, v in list(d.items()):
        if isinstance(v, (datetime, pd.Timestamp)):
            try:
                d[k] = v.strftime("%Y-%m-%d %H:%M:%S")
            except Exception:
                d[k] = str(v)
    return d

In [None]:
csv_path = "ChatGPT_Review.csv"
jsonl_path = "ChatGPT_Review.jsonl"
meta_path = "App_Metadata.json"

# Clean previous outputs
if os.path.exists(csv_path):
    os.remove(csv_path)
if os.path.exists(jsonl_path):
    os.remove(jsonl_path)

APP Information

In [None]:
raw = app(APP_ID, lang=LANG, country=COUNTRY)

info = {
    "Title": raw.get("title"),
    "App ID": raw.get("appId"),
    "Developer": raw.get("developer"),
    "Genre": raw.get("genre"),
    "Score (avg)": raw.get("score"),
    "Ratings count": raw.get("ratings"),
    "Reviews count": raw.get("reviews"),
    "Installs (display)": raw.get("installs"),
    "Real installs (estimated)": raw.get("realInstalls"),
    "Free": raw.get("free"),
    "Price": raw.get("price"),
    "Currency": raw.get("currency"),
    "Sale": raw.get("sale"),
    "In-app purchases": raw.get("offersIAP"),
    "In-app product price range": raw.get("inAppProductPrice"),
    "URL": raw.get("url"),
}

print("\nApp Information:\n")
for k, v in info.items():
    print(f"{k:30}: {v}")

with open(meta_path, "w", encoding="utf-8") as f:
    json.dump(info, f, ensure_ascii=False, indent=2)
print(f"\nApp metadata saved to: {meta_path}")




App Information:

Title                         : ChatGPT
App ID                        : com.openai.chatgpt
Developer                     : OpenAI
Genre                         : Productivity
Score (avg)                   : 4.7461176
Ratings count                 : 29262840
Reviews count                 : 116332
Installs (display)            : 500,000,000+
Real installs (estimated)     : 796675358
Free                          : True
Price                         : 0
Currency                      : USD
Sale                          : False
In-app purchases              : True
In-app product price range    : $19.99 - $200.00 per item
URL                           : https://play.google.com/store/apps/details?id=com.openai.chatgpt&hl=en&gl=us

App metadata saved to: App_Metadata.json


Scraping

In [None]:
# Review scraping
total = 0
buf = []
token = None
first = True
fieldnames = None

print(f"\n[info] Starting full review scrape: app_id={APP_ID}, lang={LANG}, country={COUNTRY}, sort={SORT.name}")
pbar = tqdm(desc="Fetched", unit="reviews")

try:
    while True:
        if first:
            res, token = reviews(
                APP_ID, lang=LANG, country=COUNTRY, sort=SORT, count=PAGE_SIZE
            )
            first = False
        else:
            if token is None:
                break
            res, token = reviews(APP_ID, continuation_token=token)

        if not res:
            break

        rows = [to_serializable_row(r) for r in res]

        # Initialize header once
        if fieldnames is None:
            fieldnames = list(rows[0].keys())
            with open(csv_path, "w", encoding="utf-8-sig", newline="") as cf:
                writer = csv.DictWriter(cf, fieldnames=fieldnames)
                writer.writeheader()

        buf.extend(rows)

        if len(buf) >= CHUNK:
            with open(jsonl_path, "a", encoding="utf-8") as jf:
                for row in buf:
                    jf.write(json.dumps(row, ensure_ascii=False) + "\n")
            with open(csv_path, "a", encoding="utf-8-sig", newline="") as cf:
                writer = csv.DictWriter(cf, fieldnames=fieldnames)
                writer.writerows(buf)
            total += len(buf)
            pbar.update(len(buf))
            buf.clear()

        time.sleep(SLEEP_MS / 1000.0)

    # Flush remaining data
    if buf:
        with open(jsonl_path, "a", encoding="utf-8") as jf:
            for row in buf:
                jf.write(json.dumps(row, ensure_ascii=False) + "\n")
        with open(csv_path, "a", encoding="utf-8-sig", newline="") as cf:
            writer = csv.DictWriter(cf, fieldnames=fieldnames)
            writer.writerows(buf)
        total += len(buf)
        pbar.update(len(buf))
        buf.clear()

finally:
    pbar.close()

print(f"\nFull scraping completed, total {total:,} reviews collected.")
print(f"CSV  : {csv_path}")
print(f"JSONL: {jsonl_path}")




[info] Starting full review scrape: app_id=com.openai.chatgpt, lang=en, country=us, sort=NEWEST


Fetched: 1477847reviews [2:04:33, 197.76reviews/s]


Full scraping completed, total 1,477,847 reviews collected.
CSV  : ChatGPT_Review.csv
JSONL: ChatGPT_Review.jsonl





In [None]:
# Preview
df_preview = pd.read_csv(csv_path, nrows=5, low_memory=False)
print("\nPreview of first 5 rows:")
print(df_preview.to_string(index=False))




Preview of first 5 rows:
      3-13 16:09:18  Unnamed: 1  Unnamed: 2 1.2025.063
2025-03-13 16:09:04         NaN         NaN 1.2025.063
2025-03-13 16:08:38         NaN         NaN 1.2025.063
2025-03-13 16:08:34         NaN         NaN 1.2025.063
2025-03-13 16:08:27         NaN         NaN 1.2025.063
2025-03-13 16:07:37         NaN         NaN 1.2025.056
