In [1]:
#Install Google play scraper: https://github.com/JoMingyu/google-play-scraper
!pip install google_play_scraper

Collecting google_play_scraper
  Downloading google-play-scraper-1.1.0.tar.gz (52 kB)
     -------------------------------------- 53.0/53.0 kB 678.0 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: google_play_scraper
  Building wheel for google_play_scraper (setup.py): started
  Building wheel for google_play_scraper (setup.py): finished with status 'done'
  Created wheel for google_play_scraper: filename=google_play_scraper-1.1.0-py3-none-any.whl size=24569 sha256=72a527517c8bbcfcf440c04815bc77ca0e431329ca23864b1b9a335c5f28ec18
  Stored in directory: c:\users\guilherme martins\appdata\local\pip\cache\wheels\d2\da\6c\feea3a8b8e10bb2669ef8d02c7575ac1df5a0413b9229d97f4
Successfully built google_play_scraper
Installing collected packages: google_play_scraper
Successfully installed google_play_scraper-1.1.0


In [2]:
import json
import pandas as pd
from tqdm import tqdm

import seaborn as sns
import matplotlib.pyplot as plt

from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import TerminalFormatter

from google_play_scraper import Sort, reviews, app

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

#### Top 10 Food and Drinks Apps in Portugal

1. Burger King - Portugal ⇒ com.bk.pt

2. Uber Eats: Entrega de comida ⇒ com.ubercab.eats

3. McDonald's ⇒ com.mcdonalds.mobileapp

4. Too Good To Go ⇒ com.app.tgtg

5. TheFork - Restaurantes ⇒ com.lafourchette.lafourchette

6. Bolt Food ⇒ com.bolt.deliveryclient

7. Zomato Portugal ⇒ com.outdarelab.zomato

8. Moulinex, receitas e mais... ⇒ com.groupeseb.moulinex.food

9. H3 ⇒ pt.yunit.mobile.android.h3

10. Telepizza Refeições ao Domicílio ⇒ com.telepizza

Source: https://www.mobileaction.co/

In [3]:
apps_ids = ['com.bk.pt', 'com.ubercab.eats',

'com.mcdonalds.mobileapp', 'com.app.tgtg',

'com.lafourchette.lafourchette',

'com.bolt.deliveryclient', 'com.outdarelab.zomato',

'com.groupeseb.moulinex.food', 'pt.yunit.mobile.android.h3',

'com.telepizza']

Scraping data for each app

In [4]:
app_infos = []

for ap in tqdm(apps_ids):
    info = app(ap, lang='en', country='us')
    del info['comments']
    app_infos.append(info)

100%|██████████| 10/10 [00:06<00:00,  1.54it/s]


In [5]:
app_infos_df = pd.DataFrame(app_infos)
app_infos_df.head()

Unnamed: 0,title,description,descriptionHTML,summary,installs,minInstalls,score,ratings,reviews,histogram,...,contentRatingDescription,adSupported,containsAds,released,updated,version,recentChanges,recentChangesHTML,appId,url
0,Burger King - Portugal,Join the exclusive savings with the official B...,Join the exclusive savings with the official B...,Coupons for in-store use only,"500,000+",500000,0.0,0,0,"[0, 0, 0, 0, 0]",...,,False,False,"Mar 20, 2018",1653920927,4.4.9,We have implemented improvements in the Paymen...,We have implemented improvements in the Paymen...,com.bk.pt,https://play.google.com/store/apps/details?id=...
1,Uber Eats: Food Delivery,Get food delivery to your doorstep from thousa...,Get food delivery to your doorstep from thousa...,"Food & Grocery Delivery App. Order Pizza, Sush...","100,000,000+",100000000,4.440442,4608237,256148,"[385078, 57465, 139834, 586162, 3439676]",...,,True,True,"Feb 29, 2016",1654531465,6.115.10000,We update the Uber Eats app as often as possib...,We update the Uber Eats app as often as possib...,com.ubercab.eats,https://play.google.com/store/apps/details?id=...
2,McDonald's,Download the McDonald's™ App for unique offers...,Download the McDonald&#39;s™ App for unique of...,Download the McDonald’s App to have all the of...,"50,000,000+",50000000,3.661938,468754,1602,"[124046, 17657, 20988, 35425, 270302]",...,,True,True,"Feb 26, 2018",1654678119,2.42.0,,,com.mcdonalds.mobileapp,https://play.google.com/store/apps/details?id=...
3,Too Good To Go: End Food Waste,Join millions of food waste warriors by downlo...,Join millions of food waste warriors by downlo...,Eat delicious food for next to nothing and fig...,"10,000,000+",10000000,4.782464,874466,1705,"[30052, 1865, 11588, 40757, 790004]",...,,False,False,"Jan 14, 2016",1653322017,22.5.10,Thanks for helping fight food waste with us! T...,Thanks for helping fight food waste with us! T...,com.app.tgtg,https://play.google.com/store/apps/details?id=...
4,TheFork - Restaurant bookings,<b>10€ discount on your next meal!</b>\r\nMake...,<b>10€ discount on your next meal!</b><br>Make...,Download TheFork to book your next table with ...,"10,000,000+",10000000,4.883495,210281,208,"[0, 0, 3934, 16326, 189823]",...,,False,False,"Nov 3, 2011",1653408138,20.14.1,We constantly update TheFork app to provide yo...,We constantly update TheFork app to provide yo...,com.lafourchette.lafourchette,https://play.google.com/store/apps/details?id=...


#### Scraping App Reviews

We want:
* Balanced dataset — roughly the same number of reviews for each score (1–5)
* A representative sample of the reviews for each app


We can satisfy the first requirement by using the scraping package option to filter the review score. For the second, we’ll sort the reviews by their helpfulness, which are the reviews that Google Play thinks are most important.

In [6]:
app_reviews = []

for ap in tqdm(apps_ids):
    for score in list(range(1, 6)):
        for sort_order in [Sort.MOST_RELEVANT, Sort.NEWEST]:
            rvs, _ = reviews(
                ap,
                lang='pt',
                country='br',
                sort=sort_order,
                count= 200 if score == 3 else 100,
                filter_score_with=score
            )
            for r in rvs:
                r['sortOrder'] = 'most_relevant' if sort_order == Sort.MOST_RELEVANT else 'newest'
                r['appId'] = ap
            app_reviews.extend(rvs)

100%|██████████| 10/10 [00:44<00:00,  4.43s/it]


In [None]:
len(app_reviews)

7092

Saving reviews in a CSV file

In [7]:
app_reviews_df = pd.DataFrame(app_reviews)
app_reviews_df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
0,gp:AOqpTOH4R6ibJGyIgalQ8TmRepFkD5Uc7VEmSlzIqgK...,Vitor Ferraz,https://play-lh.googleusercontent.com/a-/AOh14...,Após bastante tempo passado decidi voltar a pe...,1,33,4.4.7,2022-05-06 13:21:44,Lamentamos o incidente. Iremos reportá-lo e co...,2019-10-10 15:20:20,most_relevant,com.bk.pt
1,gp:AOqpTOF9leM2mcXtbajknpJf-pNJLM7_wbGUOHJ1q2w...,Eduardo Oliveira,https://play-lh.googleusercontent.com/a/AATXAJ...,Esta aplicação é uma miseria. De todas as veze...,1,28,4.4.4,2022-04-03 21:50:26,,NaT,most_relevant,com.bk.pt
2,gp:AOqpTOFW2-55_1RbTThklXm3uewFd8OT9Cu2r1xKGRP...,Diogo Mendes DM116,https://play-lh.googleusercontent.com/a-/AOh14...,Ja teve 5estrelas. Agora esta mesmo fraca.daqu...,1,1,4.4.9,2022-06-06 12:31:04,,NaT,most_relevant,com.bk.pt
3,gp:AOqpTOGRf4dy38flRss1lhoggSy6qTaSHaHK2wOhz_R...,Ricardo Lemos,https://play-lh.googleusercontent.com/a/AATXAJ...,O burguer King mais proximo de minha casa fica...,1,24,4.4.5,2022-04-07 15:50:14,,NaT,most_relevant,com.bk.pt
4,gp:AOqpTOHAD66dRmx7EAvkJ8RAOflfospnxO3hBaQ5G26...,Rui Moreira,https://play-lh.googleusercontent.com/a/AATXAJ...,Não funciona. Diz que a ligação à internet não...,1,1,4.4.8,2022-05-28 17:03:29,,NaT,most_relevant,com.bk.pt


In [8]:
app_reviews_df.to_csv('reviews.csv', index=None, header=True)