# Ragnarok Mobile Review Scraping

In [1]:
# Will be using this to get information on each app based on appid
# Information can be found here https://github.com/JoMingyu/google-play-scraper

!pip install 'google-play-scraper'



In [2]:
import requests
import datetime
from datetime import datetime
import time
import pandas as pd

==================================================================================================================

# Getting general information about the game

- Global ROM = "https://play.google.com/store/apps/details?id=com.gravity.romNAg"
- SEA ROM = "https://play.google.com/store/apps/details?id=com.gravity.romg"
- EU ROM = "https://play.google.com/store/apps/details?id=com.gravity.romEUg"

In [3]:
# This will do most of the work for us.

from google_play_scraper import app

# Doing a test run.
result = app(
    'com.gravity.romNAg',
    lang='en',
    country='us'
)

In [4]:
# Checking what I get in the dictionary.

result

{'title': 'Ragnarok M: Eternal Love(ROM)',
 'description': 'Dear Adventurers,\r\n\r\n"Ragnarok M: Eternal Love" has been with you for more than a thousand days. Together we\'ve experienced and witnessed many stories about gathering and parting. Now, a new era is coming!\r\n\r\nTo welcome the coming new era, various teasers about this new release can be seen. A brand new map "Moon Lake" and new Main Plot Stories will lead you to a secret about the moon and rainbow. The new multi-player gameplay mode will brings you a more exciting team confrontation. And the new Free Angle mode will improve your overall adventure experience!\r\n\r\nGet ready and bring friends along. Let\'s enter the new era of greater freedom and beauty!\r\n\r\nEP 7.5 [Moon\'s Echo] official release! Follow Fate\'s new guidance. Look for Traces of Rainbow Light!\r\n\r\n-New map [Moon Lake] unlocks a new chapter of destiny\r\nIn a faraway galaxy, the moon lingered alone outside Yggdrasill. New map [Moon Lake] and server 

In [5]:
# Checking which keys I want to keep to use as features.

result.keys()

dict_keys(['title', 'description', 'descriptionHTML', 'summary', 'summaryHTML', 'installs', 'minInstalls', 'score', 'ratings', 'reviews', 'histogram', 'price', 'free', 'currency', 'sale', 'saleTime', 'originalPrice', 'saleText', 'offersIAP', 'inAppProductPrice', 'size', 'androidVersion', 'androidVersionText', 'developer', 'developerId', 'developerEmail', 'developerWebsite', 'developerAddress', 'privacyPolicy', 'developerInternalID', 'genre', 'genreId', 'icon', 'headerImage', 'screenshots', 'video', 'videoImage', 'contentRating', 'contentRatingDescription', 'adSupported', 'containsAds', 'released', 'updated', 'version', 'recentChanges', 'recentChangesHTML', 'comments', 'appId', 'url'])

In [6]:
featurekeys = ['title', 'installs', 'minInstalls', 'score', 'ratings', 'reviews', 'histogram', 'price', 'free', 'offersIAP', 'size', 'genre', 'genreId', 'released', 'contentRating', 'appId']

In [7]:
# Filtering dictionary so I only get results I want.

filteredresult = dict((k, result[k]) for k in featurekeys if k in result)
filteredresult

{'title': 'Ragnarok M: Eternal Love(ROM)',
 'installs': '1,000,000+',
 'minInstalls': 1000000,
 'score': 3.850341,
 'ratings': 77414,
 'reviews': 38071,
 'histogram': [15451, 3277, 4931, 7501, 46254],
 'price': 0,
 'free': True,
 'offersIAP': True,
 'size': '71M',
 'genre': 'Role Playing',
 'genreId': 'GAME_ROLE_PLAYING',
 'released': 'Jan 8, 2019',
 'contentRating': 'Teen',
 'appId': 'com.gravity.romNAg'}

In [8]:
# Now to make a function that gives me the filtered results.

def givemeinfo(appid):
    result = app(
    appid,
    lang='en',
    country='us')
    
    featurekeys = ['title', 'installs', 'minInstalls', 'score', 'ratings', 'reviews', 'histogram', 'price', 'free', 'offersIAP', 'size', 'genre', 'genreId', 'released', 'contentRating', 'appId']
    
    filteredresult = dict((k, result[k]) for k in featurekeys if k in result)
    
    return filteredresult

In [12]:
print(givemeinfo("com.gravity.romNAg"))
print('--------------------------------')
print(givemeinfo("com.gravity.romg"))
print('--------------------------------')
print(givemeinfo("com.gravity.romEUg"))

{'title': 'Ragnarok M: Eternal Love(ROM)', 'installs': '1,000,000+', 'minInstalls': 1000000, 'score': 3.850341, 'ratings': 77414, 'reviews': 38071, 'histogram': [15451, 3277, 4931, 7501, 46254], 'price': 0, 'free': True, 'offersIAP': True, 'size': '71M', 'genre': 'Role Playing', 'genreId': 'GAME_ROLE_PLAYING', 'released': 'Jan 8, 2019', 'contentRating': 'Teen', 'appId': 'com.gravity.romNAg'}
--------------------------------
{'title': 'Ragnarok M: Eternal Love', 'installs': '5,000,000+', 'minInstalls': 5000000, 'score': 3.9376836, 'ratings': 305117, 'reviews': 135395, 'histogram': [57460, 8844, 18535, 30687, 189591], 'price': 0, 'free': True, 'offersIAP': True, 'size': '69M', 'genre': 'Role Playing', 'genreId': 'GAME_ROLE_PLAYING', 'released': None, 'contentRating': 'Teen', 'appId': 'com.gravity.romg'}
--------------------------------
{'title': 'Ragnarok M: Eternal Love EU', 'installs': '100,000+', 'minInstalls': 100000, 'score': 4.034524, 'ratings': 15255, 'reviews': 5672, 'histogram':

## For the date [12/16/2020] at 12:16 AM EST.
#### - We see that there are 38062 Global reviews
#### - We see that there are 135395 SEA reviews
#### - We see that there are 5672 EU reviews

------------
It seems more favorable to not include EU reviews for two reasons:
- There are a lack of reviews to work with
- It is not on the same version of the game as Global and SEA. It is behind by one episode (one major update)


In [13]:
# Now to get a list of all reviews for this game.

from google_play_scraper import Sort, reviews

In [14]:
# filter only the results that we want.

def givemereviews(appids):
    
    # Parts of the review that we are interested in
    keys = {'content', 'score', 'thumbsUpCount', 'reviewCreatedVersion', 'replyContent', 'at'}
    
    # The function itself
    result, continuation_token = reviews(
    appids,
    lang='en',
    country='us',
    sort=Sort.RATING,
    count=10000,
)
    # Filtering the results
    filteredresults = [{k:str(v) for k, v in i.items() if k in keys} for i in result]
    for item in filteredresults:
        item.update( {"appId":appids})
    return filteredresults

In [18]:
starttime = datetime.now()

reviewlistglobal = givemereviews('com.gravity.romNAg')

print(datetime.now() - starttime)
print('------------')
print(reviewlist[:5])

0:00:15.243346
------------
[{'content': 'meow', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.8', 'at': '2020-04-18 10:49:25', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'Good game', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.4', 'at': '2019-12-10 14:42:27', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'Good old days of playing Ragnarok has been re-played again. 😊😊😊', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.8', 'at': '2020-07-05 00:46:44', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'ОРПАВОРПП ОРИ', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.4', 'at': '2020-01-30 14:23:15', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'I like it', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.8', 'at': '2020-05-16 08:44:32', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}]


In [19]:
starttime = datetime.now()

reviewlistsea = givemereviews('com.gravity.romg')

print(datetime.now() - starttime)
print('------------')
print(reviewlist[:5])

0:00:34.945052
------------
[{'content': 'meow', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.8', 'at': '2020-04-18 10:49:25', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'Good game', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.4', 'at': '2019-12-10 14:42:27', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'Good old days of playing Ragnarok has been re-played again. 😊😊😊', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.8', 'at': '2020-07-05 00:46:44', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'ОРПАВОРПП ОРИ', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.4', 'at': '2020-01-30 14:23:15', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'I like it', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.8', 'at': '2020-05-16 08:44:32', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}]


In [20]:
starttime = datetime.now()

reviewlisteu = givemereviews('com.gravity.romEUg')

print(datetime.now() - starttime)
print('------------')
print(reviewlist[:5])

0:00:02.117926
------------
[{'content': 'meow', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.8', 'at': '2020-04-18 10:49:25', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'Good game', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.4', 'at': '2019-12-10 14:42:27', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'Good old days of playing Ragnarok has been re-played again. 😊😊😊', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.8', 'at': '2020-07-05 00:46:44', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'ОРПАВОРПП ОРИ', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.4', 'at': '2020-01-30 14:23:15', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}, {'content': 'I like it', 'score': '5', 'thumbsUpCount': '0', 'reviewCreatedVersion': '1.0.8', 'at': '2020-05-16 08:44:32', 'replyContent': 'None', 'appId': 'com.gravity.romEUg'}]


In [21]:
# It seems like these are the number of reviews we get per app regardless of sorting method.

print(len(reviewlistglobal))
print(len(reviewlistsea))
print(len(reviewlisteu))

6469
9950
571


==================================================================================================================

## Compressing files for use in other notebooks
We will only compress the global and sea files.

In [22]:
# We will use bz2 to compress pickle files to 1/4th the size.

import pickle
import bz2

# This is to compress it.
def compressed_pickle(title, data):
    with bz2.BZ2File(title + '.pbz2', 'w') as f:
        pickle.dump(data, f)
        
# This is to bring it back.        
def decompress_pickle(file):
    data = bz2.BZ2File(file, 'rb')
    data = pickle.load(data)
    return data

In [23]:
compressed_pickle('compressedlistreview', reviewlistglobal)
compressed_pickle('compressedlistreview2', reviewlistsea)

==================================================================================================================