In [3]:
import google_play_scraper

app_id = 'com.lemonde.androidapp'

In [None]:
from google_play_scraper import Sort
from google_play_scraper.constants.element import ElementSpecs
from google_play_scraper.constants.regex import Regex
from google_play_scraper.constants.request import Formats
from google_play_scraper.utils.request import post

import pandas as pd
from datetime import datetime
from tqdm import tqdm # type: ignore
import time
import json
from time import sleep
from typing import List, Optional, Tuple

In [5]:
MAX_COUNT_EACH_FETCH = 199


class _ContinuationToken:
    __slots__ = (
        "token",
        "lang",
        "country",
        "sort",
        "count",
        "filter_score_with",
        "filter_device_with",
    )

    def __init__(
        self, token, lang, country, sort, count, filter_score_with, filter_device_with
    ):
        self.token = token
        self.lang = lang
        self.country = country
        self.sort = sort
        self.count = count
        self.filter_score_with = filter_score_with
        self.filter_device_with = filter_device_with


def _fetch_review_items(
    url: str,
    app_id: str,
    sort: int,
    count: int,
    filter_score_with: Optional[int],
    filter_device_with: Optional[int],
    pagination_token: Optional[str],
):
    dom = post(
        url,
        Formats.Reviews.build_body(
            app_id,
            sort,
            count,
            "null" if filter_score_with is None else filter_score_with,
            "null" if filter_device_with is None else filter_device_with,
            pagination_token,
        ),
        {"content-type": "application/x-www-form-urlencoded"},
    )
    match = json.loads(Regex.REVIEWS.findall(dom)[0])

    return json.loads(match[0][2])[0], json.loads(match[0][2])[-2][-1]


def reviews(
    app_id: str,
    lang: str = "en",
    country: str = "us",
    sort: Sort = Sort.MOST_RELEVANT,
    count: int = 100,
    filter_score_with: int = None,
    filter_device_with: int = None,
    continuation_token: _ContinuationToken = None,
) -> Tuple[List[dict], _ContinuationToken]:
    sort = sort.value

    if continuation_token is not None:
        token = continuation_token.token

        if token is None:
            return (
                [],
                continuation_token,
            )

        lang = continuation_token.lang
        country = continuation_token.country
        sort = continuation_token.sort
        count = continuation_token.count
        filter_score_with = continuation_token.filter_score_with
        filter_device_with = continuation_token.filter_device_with
    else:
        token = None

    url = Formats.Reviews.build(lang=lang, country=country)

    _fetch_count = count

    result = []

    while True:
        if _fetch_count == 0:
            break

        if _fetch_count > MAX_COUNT_EACH_FETCH:
            _fetch_count = MAX_COUNT_EACH_FETCH

        try:
            review_items, token = _fetch_review_items(
                url,
                app_id,
                sort,
                _fetch_count,
                filter_score_with,
                filter_device_with,
                token,
            )
        except (TypeError, IndexError):
            #funnan MOD start
            token = continuation_token.token
            continue
            #MOD end

        for review in review_items:
            result.append(
                {
                    k: spec.extract_content(review)
                    for k, spec in ElementSpecs.Review.items()
                }
            )

        _fetch_count = count - len(result)

        if isinstance(token, list):
            token = None
            break

    return (
        result,
        _ContinuationToken(
            token, lang, country, sort, count, filter_score_with, filter_device_with
        ),
    )


def reviews_all(app_id: str, sleep_milliseconds: int = 0, **kwargs) -> list:
    kwargs.pop("count", None)
    kwargs.pop("continuation_token", None)

    continuation_token = None

    result = []

    while True:
        _result, continuation_token = reviews(
            app_id,
            count=MAX_COUNT_EACH_FETCH,
            continuation_token=continuation_token,
            **kwargs
        )

        result += _result

        if continuation_token.token is None:
            break

        if sleep_milliseconds:
            sleep(sleep_milliseconds / 1000)

    return result

In [8]:
reviews_count = 20000

In [9]:
result = []
continuation_token = None


with tqdm(total=reviews_count, position=0, leave=True) as pbar:
    while len(result) < reviews_count:
        new_result, continuation_token = reviews(
            app_id,
            continuation_token=continuation_token,
            lang='fr', #The language of review
            country='fr', #Country for which you want to scrape 
            sort=Sort.MOST_RELEVANT,
            filter_score_with=None,
            count=199 #No need to change this
        )
        if not new_result:
            break
        result.extend(new_result)
        pbar.update(len(new_result))

20099it [00:36, 547.76it/s]                           


In [10]:
df = pd.DataFrame(result)

df.head(5)

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion
0,322c3171-6194-403f-b37d-b74f97a4b7c0,Un utilisateur de Google,https://play-lh.googleusercontent.com/EGemoI2N...,Problème avec le transfert du journal papier- ...,3,3,9.14.1,2025-05-15 17:14:32,,NaT,9.14.1
1,7132f235-4ad1-4cfe-8033-46bf08fe9be9,André Mornard,https://play-lh.googleusercontent.com/a/ACg8oc...,Application largement perfectible au niveau de...,3,34,9.12.3,2025-01-25 10:25:28,,NaT,9.12.3
2,928398a3-485b-4a27-bc3d-a6a39543eb77,Quentin Marlier,https://play-lh.googleusercontent.com/a-/ALV-U...,L'appli est vraiment pas ouf : - pas d'histori...,2,18,9.14.1,2025-05-24 10:14:48,,NaT,9.14.1
3,d8db1c90-e1d0-4438-b3c8-e5dc2f763960,Auguste Harlé,https://play-lh.googleusercontent.com/a-/ALV-U...,"Les articles sont souvent passionnants, mais l...",1,22,9.12.3,2025-02-28 06:07:32,,NaT,9.12.3
4,2fec8b88-0046-42c8-852e-a5b5ffd5ee93,Claude Salzman,https://play-lh.googleusercontent.com/a-/ALV-U...,L'application est très bien faite et fonctionn...,5,72,9.5.2,2022-11-26 18:17:20,,NaT,9.5.2


In [11]:
df.columns

Index(['reviewId', 'userName', 'userImage', 'content', 'score',
       'thumbsUpCount', 'reviewCreatedVersion', 'at', 'replyContent',
       'repliedAt', 'appVersion'],
      dtype='object')

In [12]:
df = df[['reviewId', 'userName', 'content', 'score',
       'thumbsUpCount', 'reviewCreatedVersion', 'at', 'appVersion']]

In [13]:
import datetime 

today = datetime.date.today()

yesterday = today - datetime.timedelta(days=1)

print(yesterday)

2025-06-01


In [14]:
df.head()

Unnamed: 0,reviewId,userName,content,score,thumbsUpCount,reviewCreatedVersion,at,appVersion
0,322c3171-6194-403f-b37d-b74f97a4b7c0,Un utilisateur de Google,Problème avec le transfert du journal papier- ...,3,3,9.14.1,2025-05-15 17:14:32,9.14.1
1,7132f235-4ad1-4cfe-8033-46bf08fe9be9,André Mornard,Application largement perfectible au niveau de...,3,34,9.12.3,2025-01-25 10:25:28,9.12.3
2,928398a3-485b-4a27-bc3d-a6a39543eb77,Quentin Marlier,L'appli est vraiment pas ouf : - pas d'histori...,2,18,9.14.1,2025-05-24 10:14:48,9.14.1
3,d8db1c90-e1d0-4438-b3c8-e5dc2f763960,Auguste Harlé,"Les articles sont souvent passionnants, mais l...",1,22,9.12.3,2025-02-28 06:07:32,9.12.3
4,2fec8b88-0046-42c8-852e-a5b5ffd5ee93,Claude Salzman,L'application est très bien faite et fonctionn...,5,72,9.5.2,2022-11-26 18:17:20,9.5.2


In [15]:
df['at'].iloc[0].date()

datetime.date(2025, 5, 15)

In [16]:
df['at'].iloc[-1].date()

datetime.date(2010, 9, 8)

In [17]:
df

Unnamed: 0,reviewId,userName,content,score,thumbsUpCount,reviewCreatedVersion,at,appVersion
0,322c3171-6194-403f-b37d-b74f97a4b7c0,Un utilisateur de Google,Problème avec le transfert du journal papier- ...,3,3,9.14.1,2025-05-15 17:14:32,9.14.1
1,7132f235-4ad1-4cfe-8033-46bf08fe9be9,André Mornard,Application largement perfectible au niveau de...,3,34,9.12.3,2025-01-25 10:25:28,9.12.3
2,928398a3-485b-4a27-bc3d-a6a39543eb77,Quentin Marlier,L'appli est vraiment pas ouf : - pas d'histori...,2,18,9.14.1,2025-05-24 10:14:48,9.14.1
3,d8db1c90-e1d0-4438-b3c8-e5dc2f763960,Auguste Harlé,"Les articles sont souvent passionnants, mais l...",1,22,9.12.3,2025-02-28 06:07:32,9.12.3
4,2fec8b88-0046-42c8-852e-a5b5ffd5ee93,Claude Salzman,L'application est très bien faite et fonctionn...,5,72,9.5.2,2022-11-26 18:17:20,9.5.2
...,...,...,...,...,...,...,...,...
20094,05251020-9697-4b3f-b8df-90e749932009,Un utilisateur de Google,La dernière version n arrête pas de planter Do...,3,1,1.0.3,2011-10-02 18:08:56,1.0.3
20095,738403f3-f0e2-42c0-ba27-bfb0226077ae,Un utilisateur de Google,"À l'image du journal. Precis, sérieux et fluide",4,0,1.0.3,2010-11-27 16:57:44,1.0.3
20096,a4bf02c8-1b5c-4038-868a-f85cedfebf75,Un utilisateur de Google,Bonne application d'informations malgré quelqu...,4,0,1.0.3,2010-07-27 10:49:20,1.0.3
20097,d4241a05-0663-42e1-a58a-c4a41bd3dcb9,Un utilisateur de Google,"Trop lourde, préfère lui LeMonde.fr (non offic...",3,0,1.0.3,2011-05-31 08:31:10,1.0.3


In [20]:
df.to_csv("Webscraping_lemonde_playstore.csv")