In [2]:
import json

In [3]:
import pandas as pd

In [4]:
from tqdm import tqdm

In [5]:
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
from pygments import highlight
from pygments.lexers import JsonLexer
from pygments.formatters import TerminalFormatter

In [7]:
from google_play_scraper import Sort, reviews, app

In [8]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

In [9]:
def print_json(json_object):
  json_str = json.dumps(
    json_object,
    indent=2,
    sort_keys=True,
    default=str
  )
  print(highlight(json_str, JsonLexer(), TerminalFormatter()))

In [10]:
sns.set(style='whitegrid', palette='muted', font_scale=1.2)

In [11]:
app_packages = ['com.tencent.ig']

In [12]:
app_reviews = []
for ap in tqdm(app_packages):
        for score in list(range(1,6)):
            for sort_order in [Sort.MOST_RELEVANT, Sort.NEWEST]:
                rvs, _ = reviews(
                    ap,
                    lang='en',
                    country='us',
                    sort=sort_order,
                    count= 200 if score == 3 else 100,
                    filter_score_with=score
                  )
                
                for r in rvs:
                    r['sortOrder'] = 'most_relevant' if sort_order == Sort.MOST_RELEVANT else 'newest'
                    r['appId'] = ap
                    app_reviews.extend(rvs)

100%|██████████| 1/1 [00:07<00:00,  7.05s/it]


In [12]:
print_json(app_reviews[32013])

{
  [94m"appId"[39;49;00m: [33m"com.tencent.ig"[39;49;00m,
  [94m"at"[39;49;00m: [33m"2020-06-16 22:35:18"[39;49;00m,
  [94m"content"[39;49;00m: [33m"I would give it a 5 start but recently i got 80 merit when my genade accidentally hit my knock teamate and now i cant play squad....hope u fix this issue because the game never know u purposely kill your teamate or accidentally....."[39;49;00m,
  [94m"repliedAt"[39;49;00m: [34mnull[39;49;00m,
  [94m"replyContent"[39;49;00m: [34mnull[39;49;00m,
  [94m"reviewCreatedVersion"[39;49;00m: [34mnull[39;49;00m,
  [94m"reviewId"[39;49;00m: [33m"gp:AOqpTOE7FYOvBkdM4587jsDB1hyjf510u23rmNNER7D3p8MnTbeANiP6prNVsL-zHeCZlCOxe8LG1_q5zPbdmBE"[39;49;00m,
  [94m"score"[39;49;00m: [34m2[39;49;00m,
  [94m"sortOrder"[39;49;00m: [33m"newest"[39;49;00m,
  [94m"thumbsUpCount"[39;49;00m: [34m0[39;49;00m,
  [94m"userImage"[39;49;00m: [33m"https://lh3.googleusercontent.com/a-/AOh14GinhDH5wEhvyxDHUx_Sl-GXOxSPWWL2AX6mlr9I"[39

In [13]:
len(app_reviews)

160000

In [14]:
app_reviews_df = pd.DataFrame(app_reviews)
app_reviews_df.head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
0,gp:AOqpTOGNqcPbJeMUSVQpJevJDkIRAcQ0InIwJWN5s4m...,vicky adok,https://lh3.googleusercontent.com/a-/AOh14Gh3d...,I turned the 5 star to a 1 star. The game is a...,1,3,0.18.0,2020-06-15 17:03:40,,NaT,most_relevant,com.tencent.ig
1,gp:AOqpTOFebQZzqnwUBQHyrOBIowglRpoBbk-SsDVVWGR...,Adhir Maraj,https://lh3.googleusercontent.com/--wyzTELlt2I...,Don't even bother downloading the game. It's l...,1,312,0.18.0,2020-06-13 22:42:52,"Hi Adhir, We have a zero-tolerance stance on c...",2020-06-15 17:02:38,most_relevant,com.tencent.ig
2,gp:AOqpTOEjnEdM9asngodMC3hPr0LA5MNYk2NJGEpbo-z...,jayasoorya sukumaran,https://lh3.googleusercontent.com/a-/AOh14Gg13...,The game has become too full of glitches...and...,1,50,0.18.0,2020-06-15 11:38:21,"Hello jayasoorya, we understand your concern t...",2020-06-16 12:09:45,most_relevant,com.tencent.ig
3,gp:AOqpTOGYYo7sqhQ2pQfq4IdfrwhH8w1Llbi5_bm400g...,Cayne Game,https://lh3.googleusercontent.com/a-/AOh14Ghkz...,"It's a decent game, just really bad implementa...",1,1234,0.18.0,2020-05-21 18:18:38,"Hello, thanks for your valuable feedback! We a...",2020-05-26 16:34:16,most_relevant,com.tencent.ig
4,gp:AOqpTOFD_kawfhbfYiPh-cH8jfH6k0sBIKQNXoFyJkd...,danny alonso,https://lh3.googleusercontent.com/-jh89VLezdAw...,The game servers are glitched now. Even with l...,1,0,0.18.0,2020-06-15 23:04:50,,NaT,most_relevant,com.tencent.ig


In [15]:
app_reviews_df.drop(['reviewId', 'userName', 'userImage','at','replyContent','repliedAt','appId','reviewCreatedVersion','thumbsUpCount','sortOrder'], axis =1, inplace = True)
app_reviews_df.drop_duplicates(keep='first', inplace = True)

In [18]:
app_reviews_df.head()

Unnamed: 0,content,score
0,I turned the 5 star to a 1 star. The game is a...,1
1,Don't even bother downloading the game. It's l...,1
2,The game has become too full of glitches...and...,1
3,"It's a decent game, just really bad implementa...",1
4,The game servers are glitched now. Even with l...,1


In [19]:
app_reviews_df.to_csv(r'/Users/abhijeet/Documents/Review Sentiment Analysis/PUBG/reviews_pubg.csv', index=None, header=True)