# Google-Play-Scraper provides APIs to easily crawl the Google Play Store for Python without any external dependencies!

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
pip install google-play-scraper

Collecting google-play-scraper
  Downloading google_play_scraper-1.2.6-py3-none-any.whl (28 kB)
Installing collected packages: google-play-scraper
Successfully installed google-play-scraper-1.2.6


In [3]:
%cd /content/drive/MyDrive/AI_project/NLP

/content/drive/MyDrive/AI_project/NLP


### API to get app information on Google Play

In [7]:
from google_play_scraper import app

result = app(
    'com.binance.dev',
    lang='en', # defaults to 'en'
    country='us' # defaults to 'us'
)

In [9]:
print('App detail : \n' , result)

App detail : 
 {'title': 'Binance: Buy Bitcoin & Crypto', 'description': 'Securely buy, sell, and hold cryptocurrencies like Bitcoin (BTC), Ethereum (ETH), and Cardano (ADA) with low trading fees. Used by over 170 million users worldwide, Binance is the largest cryptocurrency exchange* in the world.\r\nHere is why:\r\n\r\nTRADE YOUR FAVORITE TOKENS AND MORE\r\n\r\nTrade over 350 listed cryptocurrencies, including Bitcoin (BTC), Ethereum (ETH) and Worldcoin (WLD).\r\nTrack the market using price alerts and trade with advanced trading tools. \r\nSet up recurring orders (DCA) to buy crypto every hour, day, week or month.\r\nEnjoy best-in-class liquidity on each crypto trade. \r\nBuy cryptocurrency and fund your wallet in minutes with flexible payment options, including credit/debit, bank transfers, and peer-to-peer (P2P) trading.\r\nFind leading traders and replicate their trading strategies with one tap.\r\n\r\nEARN DAILY REWARDS ON YOUR IDLE ASSETS\r\n\r\nEarn rewards from staking, dual

In [10]:
import pandas as pd
import os
from google_play_scraper import Sort, reviews_all, reviews

### reviews function allows you to crawl reviews from Google Play in batches, handling pagination automatically. By providing a continuation token from a previous call, you can resume crawling from the last fetched review. This approach ensures efficient retrieval of reviews while adhering to Google Play's limitations.

In [13]:

result, continuation_token = reviews(
    'com.binance.dev',
    lang='en', # defaults to 'en'
    country='us', # defaults to 'us'
    sort=Sort.NEWEST, # defaults to Sort.NEWEST
    count=3, # defaults to 100
    filter_score_with=5 # defaults to None(means all score)
)

# If you pass `continuation_token` as an argument to the reviews function at this point,
# it will crawl the items after 3 review items.

result, _ = reviews(
    'com.binance.dev',
    continuation_token=continuation_token # defaults to None(load from the beginning)
)

In [14]:
print('Results of 3 latest reviews : \n', result)

Results of 3 latest reviews : 
 [{'reviewId': '2ca649aa-6f2d-4e21-ad12-7c501138d43d', 'userName': 'Ariyan Shuvo', 'userImage': 'https://play-lh.googleusercontent.com/a/ACg8ocKtlQoaSLcLhRvM7aL0cd3MlsYzscvkfP9yd3czprT6=mo', 'content': 'good app', 'score': 5, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2.80.4', 'at': datetime.datetime(2024, 3, 24, 9, 29, 4), 'replyContent': None, 'repliedAt': None, 'appVersion': '2.80.4'}, {'reviewId': '18aa2fc3-d2cb-400b-8469-c53a1671e3fa', 'userName': 'Zack Byama', 'userImage': 'https://play-lh.googleusercontent.com/a-/ALV-UjWUpmAXDDDmldHvzAANoe-DujNx_O2iThJADQZmGY1UIw', 'content': 'All in on, ... the best one!', 'score': 5, 'thumbsUpCount': 0, 'reviewCreatedVersion': '2.79.5', 'at': datetime.datetime(2024, 3, 24, 9, 23, 37), 'replyContent': 'Hello Zack! Thank you for using our app and taking the time to rate us. These 5 stars motivate us to improve more and more! If you need anything, you can talk to us in our chat: http://binance.com/en/chat. Thanks!

### reviews_all function returns all of reviews from app. If you want to set the count to infinity while using the reviews function, you can use the reviews_all function

In [15]:
# Get review data from Google Play Store
results = reviews_all(
    'com.binance.dev',
    sleep_milliseconds=2, # defaults to 0
    lang='en',
    country='us',
    count = 500,
    sort=Sort.NEWEST # defaults to Sort.MOST_RELEVANT
)

# Create DataFrame from collected data
df = pd.DataFrame(results)

# Create a path to save data
path = './Crawl_Dataset'

# Save DataFrame to Google Drive as CSV
df.to_csv(os.path.join(path, 'binance_reviews.csv'), index=False)

# Show notification when saved successfully
print("Dữ liệu đã được lưu vào file 'google_play_reviews.csv' thành công.")

Dữ liệu đã được lưu vào file 'google_play_reviews.csv' thành công.


### Use data collection from many apps and many different languages

In [18]:
#key dictionary = application name, value = application id on google play store (https://play.google.com/store/apps/details?id=com.garena.game.kgvn&pcampaignid=web_share in this case the ID is com.garena.game.kgvn)
# Key dictionary: application name -> application id on Google Play Store
app_id = {'telegram': 'org.telegram.messenger', 'lienquan': 'com.garena.game.kgvn'}

# List of languages to fetch reviews for
languages = ['en', 'vi']

for app_name, app_id_value in app_id.items():
    print("Downloading the reviews related to " + app_name)
    for lang in languages:
        # Fetch reviews for the current application and language
        reviews = reviews_all(
            app_id_value,              # Application ID
            sleep_milliseconds=0,     # Sleep time between requests (default: 0)
            lang=lang,                # Language of reviews (default: 'en')
            count=200                 # Number of reviews to fetch per request (default: 100)
        )

        # Remove unnecessary information from each review
        for review in reviews:
            del review["userImage"]
            del review["thumbsUpCount"]
            del review["reviewCreatedVersion"]
            del review["at"]
            del review["replyContent"]
            del review["repliedAt"]

        print("Dataset " + app_name + "_" + lang + ": " + str(len(reviews)) + " occurrences.")

        if not reviews:  # If there are no reviews in the desired language, skip to the next language
            continue

        # Convert the list of dictionaries into a DataFrame
        df = pd.DataFrame(reviews)

        # Save DataFrame to Google Drive as CSV
        df.to_csv(os.path.join(path, app_name + "_" + lang + ".csv"), index=False)


Downloading the reviews related to telegram
Dataset telegram_en: 199 occurrences.
Dataset telegram_vi: 199 occurrences.
Downloading the reviews related to lienquan
Dataset lienquan_en: 597 occurrences.
Dataset lienquan_vi: 995 occurrences.
