In [1]:
# Install packages
!pip install google-play-scraper pandas tqdm

Collecting google-play-scraper
  Downloading google_play_scraper-1.2.7-py3-none-any.whl.metadata (50 kB)
Downloading google_play_scraper-1.2.7-py3-none-any.whl (28 kB)
Installing collected packages: google-play-scraper
Successfully installed google-play-scraper-1.2.7


In [16]:
# Import packages
from google_play_scraper import reviews, Sort
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import time
import random

In [2]:
# Create a dictionary with google playstore ID's
apps = {
    'nike':'com.nike.omega',
    'adidas':'com.adidas.app',
    'puma':'com.puma.ecom.app',
    'gymshark':'com.gymshark.store'
}

In [22]:
def fetch_reviews(app_id, brand, lang='en', country='us', max_reviews=2000, sleep=3, max_pages=20):
    all_rows = []
    token = None
    pages = 0

    while len(all_rows) < max_reviews:
        prev_len = len(all_rows)

        batch, token = reviews(
            app_id,
            lang=lang,
            country=country,
            sort=Sort.NEWEST,
            count=min(200, max_reviews - len(all_rows)),
            continuation_token=token
        )

        # Safety: break if no new data came in (prevents rare token loops / empty batches)
        if not batch:
            print("Empty batch, stopping.")
            break

        for r in batch:
            all_rows.append({
                'brand': brand,
                'review_id': r.get('reviewId') or r.get('reviewID'),
                'score': r.get('score'),
                'at': r.get('at'),
                'content': r.get('content'),
                'reply_content': r.get('replyContent'),
                'thumbs_up': r.get('thumbsUpCount'),
                'review_created_version': r.get('reviewCreatedVersion')
            })

        pages += 1
        print(f'Check (page {pages}, total {len(all_rows)})')

        # Stop if token is exhausted
        if token is None:
            break

        # Extra guard: stop if no growth
        if len(all_rows) == prev_len:
            print("No growth in rows, stopping.")
            break

        # Page cap so we don’t loop forever on weird tokens
        if pages >= max_pages:
            print(f"Hit max_pages={max_pages}, stopping.")
            break

        # Small jitter helps avoid rate limits
        time.sleep(sleep + random.uniform(0, 0.75))

    return pd.DataFrame(all_rows)

In [23]:
frames = []
for brand, pkg in apps.items():
    print(f'Fetching: {brand} ({pkg})')
    df = fetch_reviews(pkg, brand, max_reviews = 2000)
    frames.append(df)
    
reviews_df = pd.concat(frames, ignore_index=True)

print(f'Collection finished, {len(reviews_df)} reviews collected')

Fetching: nike (com.nike.omega)
Check (page 1, total 200)
Check (page 2, total 400)
Check (page 3, total 600)
Check (page 4, total 800)
Check (page 5, total 1000)
Check (page 6, total 1200)
Check (page 7, total 1400)
Check (page 8, total 1600)
Check (page 9, total 1800)
Check (page 10, total 2000)
Fetching: adidas (com.adidas.app)
Check (page 1, total 200)
Check (page 2, total 400)
Check (page 3, total 600)
Check (page 4, total 800)
Check (page 5, total 1000)
Check (page 6, total 1200)
Check (page 7, total 1400)
Check (page 8, total 1600)
Check (page 9, total 1800)
Check (page 10, total 2000)
Fetching: puma (com.puma.ecom.app)
Check (page 1, total 200)
Check (page 2, total 400)
Check (page 3, total 600)
Check (page 4, total 800)
Check (page 5, total 1000)
Check (page 6, total 1200)
Check (page 7, total 1400)
Check (page 8, total 1600)
Check (page 9, total 1800)
Check (page 10, total 2000)
Fetching: gymshark (com.gymshark.store)
Check (page 1, total 200)
Check (page 2, total 400)
Check 

In [24]:
reviews_df

Unnamed: 0,brand,review_id,score,at,content,reply_content,thumbs_up,review_created_version
0,nike,aa01de31-4d30-48e6-98ea-3b74661675c4,5,2025-10-08 11:30:54,I love this app,,0,25.44.2
1,nike,d26e4203-1b5f-4bdb-8400-8f0a273dfde3,2,2025-10-08 10:16:27,Return Process in the App was repetative. When...,,0,25.45.1
2,nike,f6c096ed-1a26-4a7a-b831-48b20d6c9075,5,2025-10-08 09:25:10,Stunning😉!!,,0,
3,nike,b4e79189-0854-4f1b-8119-5eed69104879,5,2025-10-08 08:18:32,good,,0,25.45.1
4,nike,b25e4e53-2fe4-46f6-b457-af0cd8f21c9f,5,2025-10-08 07:51:26,nice apps,,0,25.45.1
...,...,...,...,...,...,...,...,...
6441,gymshark,37d027eb-dcf2-4049-ac82-36f680c1f80b,1,2022-12-16 17:13:58,I never use again buying anything from gym sha...,"Hey, we’re sorry to you feel that way. Let’s g...",1,1.13.1
6442,gymshark,ab7f807f-9ec3-4991-911c-a2bae1f9df7c,2,2022-11-23 22:00:42,For some reason the app will just not load it'...,"Hey, we’re sorry to hear you’re having issues ...",3,1.12.1
6443,gymshark,56c53e16-316d-4574-9454-7dee9d9f717a,2,2022-11-22 16:36:49,Uninstalled so I don't have to see the things ...,"Hey, we’re sorry to hear you're not happy with...",3,1.12.1
6444,gymshark,c7c3e771-1376-46c8-acdc-2356504bdff5,1,2022-11-15 17:06:52,Horrible experience with this app. After being...,"Hey, we’re sorry to hear you’re having issues ...",24,1.12.1


In [30]:
# Save to CSV
reviews_df.to_csv('reviews.csv', index=False)

