# Ragnarok Mobile Review Scraping

In [1]:
# Will be using this to get information on each app in the Google Play Store based on appid
# Information can be found here https://github.com/JoMingyu/google-play-scraper

!pip install 'google-play-scraper'



In [2]:
# Will be using this to get information on each app in the Apple App Store based on appid
# Information can be found here https://github.com/cowboy-bebug/app-store-scraper

!pip install 'app-store-scraper'

Collecting app-store-scraper
  Using cached https://files.pythonhosted.org/packages/ba/2e/35eb1628b550d5e936f0a9171c1e3ea978288849693d3c6daab9e74cbab9/app_store_scraper-0.3.5-py3-none-any.whl
Collecting requests==2.23.0 (from app-store-scraper)
  Using cached https://files.pythonhosted.org/packages/1a/70/1935c770cb3be6e3a8b78ced23d7e0f3b187f5cbfab4749523ed65d7c9b1/requests-2.23.0-py2.py3-none-any.whl
Installing collected packages: requests, app-store-scraper
  Found existing installation: requests 2.10.0
    Uninstalling requests-2.10.0:
      Successfully uninstalled requests-2.10.0
Successfully installed app-store-scraper-0.3.5 requests-2.23.0


In [3]:
import requests
import datetime
from datetime import datetime
import time
import pandas as pd

==================================================================================================================

# Getting general information about the game from the Google Play Store

- Global ROM = "https://play.google.com/store/apps/details?id=com.gravity.romNAg"
- SEA ROM = "https://play.google.com/store/apps/details?id=com.gravity.romg"
- EU ROM = "https://play.google.com/store/apps/details?id=com.gravity.romEUg"

In [4]:
# This will do most of the work for us.

from google_play_scraper import app

# Doing a test run.
result = app(
    'com.gravity.romNAg',
    lang='en',
    country='us'
)

In [5]:
# Checking what I get in the dictionary.

result

{'title': 'Ragnarok M: Eternal Love(ROM)',
 'description': 'Dear Adventurers,\r\n\r\n"Ragnarok M: Eternal Love" has been with you for more than a thousand days. Together we\'ve experienced and witnessed many stories about gathering and parting. Now, a new era is coming!\r\n\r\nTo welcome the coming new era, various teasers about this new release can be seen. A brand new map "Moon Lake" and new Main Plot Stories will lead you to a secret about the moon and rainbow. The new multi-player gameplay mode will brings you a more exciting team confrontation. And the new Free Angle mode will improve your overall adventure experience!\r\n\r\nGet ready and bring friends along. Let\'s enter the new era of greater freedom and beauty!\r\n\r\nEP 7.5 [Moon\'s Echo] official release! Follow Fate\'s new guidance. Look for Traces of Rainbow Light!\r\n\r\n-New map [Moon Lake] unlocks a new chapter of destiny\r\nIn a faraway galaxy, the moon lingered alone outside Yggdrasill. New map [Moon Lake] and server 

In [6]:
# Checking which keys I want to keep to use as features.

result.keys()

dict_keys(['title', 'description', 'descriptionHTML', 'summary', 'summaryHTML', 'installs', 'minInstalls', 'score', 'ratings', 'reviews', 'histogram', 'price', 'free', 'currency', 'sale', 'saleTime', 'originalPrice', 'saleText', 'offersIAP', 'inAppProductPrice', 'size', 'androidVersion', 'androidVersionText', 'developer', 'developerId', 'developerEmail', 'developerWebsite', 'developerAddress', 'privacyPolicy', 'developerInternalID', 'genre', 'genreId', 'icon', 'headerImage', 'screenshots', 'video', 'videoImage', 'contentRating', 'contentRatingDescription', 'adSupported', 'containsAds', 'released', 'updated', 'version', 'recentChanges', 'recentChangesHTML', 'comments', 'appId', 'url'])

In [7]:
featurekeys = ['title', 'installs', 'minInstalls', 'score', 'ratings', 'reviews', 'histogram', 'price', 'free', 'offersIAP', 'size', 'genre', 'genreId', 'released', 'contentRating', 'appId']

In [8]:
# Filtering dictionary so I only get results I want.

filteredresult = dict((k, result[k]) for k in featurekeys if k in result)
filteredresult

{'title': 'Ragnarok M: Eternal Love(ROM)',
 'installs': '1,000,000+',
 'minInstalls': 1000000,
 'score': 3.9042878,
 'ratings': 78024,
 'reviews': 38289,
 'histogram': [14657, 3096, 4849, 7876, 47546],
 'price': 0,
 'free': True,
 'offersIAP': True,
 'size': '71M',
 'genre': 'Role Playing',
 'genreId': 'GAME_ROLE_PLAYING',
 'released': 'Jan 8, 2019',
 'contentRating': 'Teen',
 'appId': 'com.gravity.romNAg'}

In [9]:
# Now to make a function that gives me the filtered results.

def givemeinfo(appid):
    result = app(
    appid,
    lang='en',
    country='us')
    
    featurekeys = ['title', 'installs', 'minInstalls', 'score', 'ratings', 'reviews', 'histogram', 'price', 'free', 'offersIAP', 'size', 'genre', 'genreId', 'released', 'contentRating', 'appId']
    
    filteredresult = dict((k, result[k]) for k in featurekeys if k in result)
    
    return filteredresult

In [10]:
print(givemeinfo("com.gravity.romNAg"))
print('--------------------------------')
print(givemeinfo("com.gravity.romg"))
print('--------------------------------')
print(givemeinfo("com.gravity.romEUg"))

{'title': 'Ragnarok M: Eternal Love(ROM)', 'installs': '1,000,000+', 'minInstalls': 1000000, 'score': 3.9042878, 'ratings': 78024, 'reviews': 38289, 'histogram': [14657, 3096, 4849, 7876, 47546], 'price': 0, 'free': True, 'offersIAP': True, 'size': '71M', 'genre': 'Role Playing', 'genreId': 'GAME_ROLE_PLAYING', 'released': 'Jan 8, 2019', 'contentRating': 'Teen', 'appId': 'com.gravity.romNAg'}
--------------------------------
{'title': 'Ragnarok M: Eternal Love', 'installs': '5,000,000+', 'minInstalls': 5000000, 'score': 3.9291751, 'ratings': 305542, 'reviews': 135549, 'histogram': [58148, 8935, 18568, 30645, 189246], 'price': 0, 'free': True, 'offersIAP': True, 'size': '69M', 'genre': 'Role Playing', 'genreId': 'GAME_ROLE_PLAYING', 'released': None, 'contentRating': 'Teen', 'appId': 'com.gravity.romg'}
--------------------------------
{'title': 'Ragnarok M: Eternal Love EU', 'installs': '100,000+', 'minInstalls': 100000, 'score': 3.9766083, 'ratings': 15310, 'reviews': 5697, 'histogram

## For the date [1/7/2021] at 10:26 PM EST.
#### - We see that there are 38289 Global reviews
#### - We see that there are 135549 SEA reviews
#### - We see that there are 5697 EU reviews

------------
It seems more favorable to not include EU reviews for two reasons:
- There are a lack of reviews to work with
- It is not on the same version of the game as Global and SEA. It is behind by one episode (one major update)


In [11]:
# Now to get a list of all reviews for this game.

from google_play_scraper import Sort, reviews_all

In [12]:
# filter only the results that we want.

def givemereviews(appids):
    
    # Parts of the review that we are interested in
    keys = {'content', 'score', 'thumbsUpCount', 'reviewCreatedVersion', 'replyContent', 'at'}
    
    # The function itself
    res = reviews_all(
    appids,
    sleep_milliseconds=0, 
    lang='en', 
    country='us', 
    sort=Sort.NEWEST, 
    filter_score_with=None 
)
    # Filtering the results
    filteredresults = [{k:str(v) for k,v in i.items() if k in keys} for i in res]
    for item in filteredresults:
        item.update({"appId":appids})
    return filteredresults

In [13]:
starttime = datetime.now()

reviewlistglobal = givemereviews('com.gravity.romNAg')

print(datetime.now() - starttime)
print('------------')
print(len(reviewlistglobal))

0:00:23.927722
------------
6533


In [14]:
starttime = datetime.now()

reviewlistsea = givemereviews('com.gravity.romg')

print(datetime.now() - starttime)
print('------------')
print(len(reviewlistsea))

0:03:17.462466
------------
72231


In [15]:
starttime = datetime.now()

reviewlisteu = givemereviews('com.gravity.romEUg')

print(datetime.now() - starttime)
print('------------')
print(len(reviewlisteu))

0:00:03.048293
------------
577


==================================================================================================================

# Getting general information about the game from the Apple App Store

In [16]:
from app_store_scraper import AppStore
from pprint import pprint

In [17]:
# I have selected these 3 countries because they make up a notable proportion of 

countries = ['us', 'ca', 'au']
applereviews = []

for country in countries:
    test = AppStore(country=country, app_name="ragnarok-m-eternal-love", app_id=1444739251)
    test.review(sleep=8)
    applereviews.extend(test.reviews)
    
len(applereviews)

2021-01-07 22:33:45,967 [INFO] Base - Initialised: AppStore('us', 'ragnarok-m-eternal-love', 1444739251)
2021-01-07 22:33:45,969 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/us/app/ragnarok-m-eternal-love/id1444739251
2021-01-07 22:33:54,146 [INFO] Base - [id:1444739251] Fetched 20 reviews (20 fetched in total)
2021-01-07 22:34:10,477 [INFO] Base - [id:1444739251] Fetched 60 reviews (60 fetched in total)
2021-01-07 22:34:32,962 [INFO] Base - [id:1444739251] Fetched 100 reviews (100 fetched in total)
2021-01-07 22:34:49,273 [INFO] Base - [id:1444739251] Fetched 140 reviews (140 fetched in total)
2021-01-07 22:35:23,870 [INFO] Base - [id:1444739251] Fetched 180 reviews (180 fetched in total)
2021-01-07 22:35:40,263 [INFO] Base - [id:1444739251] Fetched 220 reviews (220 fetched in total)
2021-01-07 22:35:56,576 [INFO] Base - [id:1444739251] Fetched 260 reviews (260 fetched in total)
2021-01-07 22:36:13,079 [INFO] Base - [id:1444739251] Fetched 300 reviews (300 fetched

1318

## For the date [1/7/2021] at 10:26 PM EST.

- It appears that we got 1318 reviews. 1056 from the US, 120 from Canada, and 142 from Australia.

==================================================================================================================

## Compressing files for use in other notebooks
We will only compress the global and sea files.

In [None]:
# We will use bz2 to compress pickle files to 1/4th the size.

import pickle
import bz2

# This is to compress it.
def compressed_pickle(title, data):
    with bz2.BZ2File(title + '.pbz2', 'w') as f:
        pickle.dump(data, f)
        
# This is to bring it back.        
def decompress_pickle(file):
    data = bz2.BZ2File(file, 'rb')
    data = pickle.load(data)
    return data

In [None]:
compressed_pickle('compressedlistreview', reviewlistglobal)
compressed_pickle('compressedlistreview2', reviewlistsea)

In [None]:
compressed_pickle('compressedlistreviewapple', applereviews)

==================================================================================================================