# Importing Libraries

In [1]:
# Import Libraries
import pandas as pd
import numpy as np

# Google Play Store Scraper
Scraping reviews for the following banks:
1. GXS Bank
2. MariBank

Data preprocessing Tasks:
1. Adding Row Number
2. Reodering and renaming columns
3. Converting Emojis to text

Lastly, downloading cleaned dataframe as csv

In [2]:
# Google Play Store Scraper Library
!pip install google-play-scraper
# Repository: https://github.com/JoMingyu/google-play-scraper

Collecting google-play-scraper
  Downloading google_play_scraper-1.2.6-py3-none-any.whl (28 kB)
Installing collected packages: google-play-scraper
Successfully installed google-play-scraper-1.2.6


In [3]:
# Obtain App_ID from url link of Apps from Google Play Store
# From the url, look for the part where: id=sg.com.gxs.app

# GXS Mobile App link https://play.google.com/store/apps/details?id=sg.com.gxs.app&hl=en&gl=US
App_ID = 'sg.com.gxs.app'

# Maribank App Link: https://play.google.com/store/apps/details?id=sg.com.maribankmobile.digitalbank&hl=en_US
App_ID2 = 'sg.com.maribankmobile.digitalbank'

In [4]:
# Getting App Details & making sure App_ID is correct
from google_play_scraper import app
result = app(
    App_ID,
    lang='en', # defaults to 'en'
    country='sg' # defaults to 'us'
)

result2 = app(
    App_ID2,
    lang='en', # defaults to 'en'
    country='sg' # defaults to 'us'
)

In [5]:
# Checking if App_ID corresponds to the correct app
print(result['title'])
print(result2['title'])

# Check approximately how many reviews the app has (number is not exact and actual number of reviews may be higher)
print(result['reviews'])
print(result2["reviews"])
# Depending on the number of total reviews the app has
# use review_all if > 199
# use review if < 199

GXS Bank
MariBank
286
64


For GXS:\
566 Reviews on Google Play Store as of 6/3/2024

For Maribank:\
129 Reviews on Google Play Store as of 6/3/2024

Reviews are split into 2 categories\
Ratings - only star ratings and no comments\
Reviews - star rating and comments

Ratings are unable to be scraped

In [6]:
# Current version of the scraper has issues scraping all of the reviews on Google Play
# github issue: https://github.com/JoMingyu/google-play-scraper/issues/209
# Current monkey-patch fix from the above link, which updates the library's functions (just run it and move onto the next code block)
# Does not completely fixes the issues below, please read comments in each code block

In [7]:
import google_play_scraper
from google_play_scraper.constants.regex import Regex
from google_play_scraper.constants.request import Formats
from google_play_scraper.utils.request import post
import typing
from typing import Optional
import json

def _fetch_review_items(
    url: str,
    app_id: str,
    sort: int,
    count: int,
    filter_score_with: typing.Optional[int],
    pagination_token: typing.Optional[str],
):
    dom = post(
        url,
        Formats.Reviews.build_body(
            app_id,
            sort,
            count,
            "null" if filter_score_with is None else filter_score_with,
            pagination_token,
        ),
        {"content-type": "application/x-www-form-urlencoded"},
    )

    # MOD error handling
    if "error.PlayDataError" in dom:
        return _fetch_review_items(url, app_id, sort, count, filter_score_with, pagination_token)
    # ENDMOD

    match = json.loads(Regex.REVIEWS.findall(dom)[0])

    return json.loads(match[0][2])[0], json.loads(match[0][2])[-1][-1]

google_play_scraper.reviews._fetch_review_items = _fetch_review_items

In [8]:
from google_play_scraper import Sort, reviews, reviews_all

# For apps that have > 199 total reviews
# Using reviews_all, output number is always a multiple of 199.
# Function returns different amounts of reviews at each execution
# Try to run multiple times until you can see what is the highest multiple of 199 of reviews you can obtain
# Github issue: https://github.com/JoMingyu/google-play-scraper/issues/208
sg_reviews = reviews_all(
    App_ID,
    sleep_milliseconds=0, # defaults to 0
    lang='en', # defaults to 'en'
    country='sg', # defaults to 'us'
    sort=Sort.NEWEST, # defaults to Sort.MOST_RELEVANT
)

# Check total number of reviews scraped
print(len(sg_reviews))

199


In [9]:
# For apps with total reviews < 199
# Using the result['reviews'] as a starting point
# Set count to the highest possible integer you can before the function returns 0

sg_reviews2, continuation_token = reviews(
    App_ID2,
    lang='en', # defaults to 'en'
    country='sg', # defaults to 'us'
    sort=Sort.NEWEST, # defaults to Sort.NEWEST
    count=69, # defaults to 100
)
print(len(sg_reviews2))

69


In [10]:
# Creating Dataframe from Reviews
df_reviews = pd.DataFrame(np.array(sg_reviews),columns=['review'])
df_reviews = df_reviews.join(pd.DataFrame(df_reviews.pop('review').tolist()))

df_reviews2 = pd.DataFrame(np.array(sg_reviews2),columns=['review'])
df_reviews2 = df_reviews2.join(pd.DataFrame(df_reviews2.pop('review').tolist()))

In [11]:
df_reviews.tail()
# Columns to keep (standardised format), rename and re-order columns
# Add new column Bank to label which bank the reviews are for
# UserName, Review, Score, Date, Bank

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion
194,3a9185b2-ee6b-46a1-9217-1c5d01dfde3b,Faith Tan,https://play-lh.googleusercontent.com/a/ACg8oc...,Unable to open the app once I've installed,1,0,2.5.0,2023-06-25 10:18:20,"Hi Faith, this is not the experience we would ...",2023-06-26 02:32:33,2.5.0
195,5d5dcd84-d6e5-4cf5-b35f-9db20d95877f,Andrei Chan,https://play-lh.googleusercontent.com/a/ACg8oc...,"Download the app, and tried to apply. Got an e...",1,0,,2023-06-25 04:12:10,"Hi Andrei , we are sorry for this experience. ...",2023-06-26 02:29:24,
196,e063c772-a883-4ae7-9c6a-267156fbb5bd,Norman Lim,https://play-lh.googleusercontent.com/a-/ALV-U...,What is the purpose of your app? Why would I u...,1,2,2.5.0,2023-06-22 13:21:57,"Hi Norman. Thanks for your feedback. At GXS, w...",2023-06-22 14:14:46,2.5.0
197,93e38461-afae-4b97-8491-d91e5ebbe8c1,David Chan,https://play-lh.googleusercontent.com/a/ACg8oc...,Great..,5,0,2.5.0,2023-06-22 04:55:21,Hey David! Thank you for your 5 stars review. ...,2023-06-22 05:38:42,2.5.0
198,3a22536e-47fc-40c1-ae58-6559c0149b2e,Nor Eliya Khan Ali,https://play-lh.googleusercontent.com/a-/ALV-U...,First time checking out on dis and didnt know ...,5,6,2.5.0,2023-06-16 10:17:39,Hi there Eliya! We are thrilled to know that y...,2023-06-16 11:18:48,2.5.0


In [12]:
df_reviews2.tail()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion
64,51c9ee8d-5eb5-4dad-b9f5-3f6682840316,C,https://play-lh.googleusercontent.com/a/ACg8oc...,App permissions that are unnecessary for this ...,4,4,2.10.0,2023-03-20 02:10:09,"Dear C, thank you for taking the time to leave...",2023-03-20 02:17:15,2.10.0
65,37233b44-9612-4785-83f9-81c2ceb9784c,Adrian Tan,https://play-lh.googleusercontent.com/a/ACg8oc...,App is very intuitive! Love the features,5,1,2.9.2,2023-03-08 06:59:34,"Dear Adrian, thank you for giving MariBank a 5...",2023-03-08 07:02:36,2.9.2
66,4cf619bd-7d37-4196-ba6b-6118f97a3c59,Denise,https://play-lh.googleusercontent.com/a/ACg8oc...,Excited with this digital bank in Singapore! L...,5,0,,2023-03-08 06:28:11,"Dear Denise, thank you for giving MariBank a 5...",2023-03-08 06:53:20,
67,15a4ce01-49c6-4543-8dce-266b3e8a5848,Zihao Zhu,https://play-lh.googleusercontent.com/a-/ALV-U...,"Super simple to use, gets things done and that...",5,3,2.9.2,2023-03-08 05:59:21,"Dear Zihao, thank you for giving MariBank a 5 ...",2023-03-08 06:53:34,2.9.2
68,1071665d-1955-420f-8478-461cf213b935,Sam Chua,https://play-lh.googleusercontent.com/a/ACg8oc...,First!,5,1,2.9.2,2023-03-08 05:41:22,"Dear Sam, thank you for giving MariBank a 5 st...",2023-03-08 06:53:47,2.9.2


In [13]:
# Add Bank Label column
df_reviews['Bank']= 'GXS Bank'
df_reviews2['Bank'] = 'Maribank'

In [14]:
# Reorder columns
GXS_df = df_reviews.reindex(columns=['userName', 'content', 'score', 'at', 'Bank'])
Maribank_df = df_reviews2.reindex(columns=['userName', 'content', 'score', 'at', 'Bank'])

In [15]:
# Rename columns
GXS_df.columns = ['UserName', 'Review', 'Score', 'Date', 'Bank']
Maribank_df.columns = ['UserName', 'Review', 'Score', 'Date', 'Bank']

In [16]:
GXS_df.head()

Unnamed: 0,UserName,Review,Score,Date,Bank
0,Anna Y,Love the function of having multiple pockets a...,4,2024-03-06 04:14:17,GXS Bank
1,Jia Hong,Quick transfers to bank acc and fast repayment...,5,2024-03-06 01:39:11,GXS Bank
2,E,A feature they should implement is to save you...,3,2024-03-04 04:23:26,GXS Bank
3,Baggio Pan,Ok Good system.,5,2024-03-01 09:52:59,GXS Bank
4,VELAN Ram,good 👍,3,2024-02-24 05:22:26,GXS Bank


In [17]:
Maribank_df.head()

Unnamed: 0,UserName,Review,Score,Date,Bank
0,Liu,"Easy to sign up, but what use is a bank withou...",2,2024-03-06 05:04:06,Maribank
1,Kevin Tan,Adding the ability to add payees before actual...,4,2024-03-02 17:39:59,Maribank
2,Xavier,"After using over 10 digital banking apps, this...",5,2024-02-29 14:49:40,Maribank
3,Joseph Goh,where the hell is my password manager... isnt ...,1,2024-02-29 12:43:13,Maribank
4,YJ,"Very simple and easy to use, I prefer it over ...",5,2024-02-24 16:41:35,Maribank


# Convert Emojis to Text

In [18]:
#Install emoji library
!pip install emoji

Collecting emoji
  Downloading emoji-2.10.1-py2.py3-none-any.whl (421 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m421.5/421.5 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.10.1


In [19]:
import emoji
import re

# Function to convert emojis to words using emoji library mapping
def convert_emojis_to_words(text):
    converted_text = emoji.demojize(text)
    return converted_text

# Apply the function to the 'text_cleaned' column in the DataFrame
GXS_df['Review'] = GXS_df['Review'].apply(convert_emojis_to_words)
Maribank_df['Review'] = Maribank_df['Review'].apply(convert_emojis_to_words)

# Download Locally

In [20]:
# Download locally
from google.colab import files
GXS_df.to_csv('GXS_Bank_Google.csv')
files.download('GXS_Bank_Google.csv')

Maribank_df.to_csv('Maribank_Google.csv')
files.download('Maribank_Google.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# App Store Review Scraper

In [21]:
# Apple App Store Review Scraper
!pip install app_store_scraper
# Repository: https://github.com/cowboy-bebug/app-store-scraper

Collecting app_store_scraper
  Downloading app_store_scraper-0.3.5-py3-none-any.whl (8.3 kB)
Collecting requests==2.23.0 (from app_store_scraper)
  Downloading requests-2.23.0-py2.py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.4/58.4 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting chardet<4,>=3.0.2 (from requests==2.23.0->app_store_scraper)
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting idna<3,>=2.5 (from requests==2.23.0->app_store_scraper)
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 (from requests==2.23.0->app_store_scraper)
  Downloading urllib3-1.25.11-py2.py3-none-any.whl (127 kB)
[2K     [

In [22]:
from app_store_scraper import AppStore

# Apple App Store Scraper
# 520 Ratings on Apple App Store as of 4/3/2024
# https://apps.apple.com/sg/app/gxs-bank/id1632183616
# if function tries to scrape too many reviews, error may occur
GXS_Bank = AppStore(country='sg', app_name='GXS Bank', app_id = '1632183616')

520 Ratings on Apple Play Store as of 4/3/2024

Reviews are split into 2 categories\
Ratings - only star ratings and no comments\
Reviews - star rating and comments

Ratings are not able to be scraped by this library

In [23]:
GXS_Bank.review()
#if how_many is not provided, review() will terminate after all reviews are fetched.

In [24]:
# Check how many reviews scrapped
GXS_Bank.reviews_count

128

In [25]:
# Creating Dataframe from Reviews
data1 = pd.DataFrame(np.array(GXS_Bank.reviews),columns=['review'])
data2 = data1.join(pd.DataFrame(data1.pop('review').tolist()))

In [26]:
data2 = data2.sort_values('date', ascending=False)
data2.head()
# Columns to keep (standardised format) rename and re-order columns if needed
# Add new column Bank to label which bank the reviews are for
# UserName, Review, Score, Date, Bank

Unnamed: 0,date,developerResponse,review,rating,isEdited,title,userName
41,2024-02-25 07:24:03,"{'id': 42216759, 'body': 'Hi there! Thank you ...",The app is easy to navigate and optimise for p...,5,False,Easy to use with friendly UIUX,Tiny Particles
19,2024-02-24 06:12:02,"{'id': 42187814, 'body': 'Hey there! We're sor...",Extremely disappointed with the GXS app. Despi...,1,False,Bad experience of this GXS app. Think again if...,HetTienDownGameBanQuyen
66,2024-02-21 14:03:59,"{'id': 42155008, 'body': 'Hi there. We're sorr...",Show allow to increase amount not reduce and s...,1,False,Cheater!,Nrhdxtan
31,2024-02-17 11:21:01,"{'id': 42034671, 'body': 'We're thrilled to he...",User friendly,5,False,Reminder n easy to use,YSL Alan
30,2024-02-10 03:22:39,"{'id': 41887361, 'body': 'Aw, thanks for the k...",This app is easy to use.,5,False,Smart & easy to use online bank,OnTheRoad9988


In [27]:
# Add Bank Label column
data2['Bank']= 'GXS Bank'

# Reorder columns
GXS_apple_df = data2.reindex(columns=['userName', 'review', 'rating', 'date', 'Bank'])

# Rename columns
GXS_apple_df.columns = ['UserName', 'Review', 'Score', 'Date', 'Bank']

In [28]:
GXS_apple_df

Unnamed: 0,UserName,Review,Score,Date,Bank
41,Tiny Particles,The app is easy to navigate and optimise for p...,5,2024-02-25 07:24:03,GXS Bank
19,HetTienDownGameBanQuyen,Extremely disappointed with the GXS app. Despi...,1,2024-02-24 06:12:02,GXS Bank
66,Nrhdxtan,Show allow to increase amount not reduce and s...,1,2024-02-21 14:03:59,GXS Bank
31,YSL Alan,User friendly,5,2024-02-17 11:21:01,GXS Bank
30,OnTheRoad9988,This app is easy to use.,5,2024-02-10 03:22:39,GXS Bank
...,...,...,...,...,...
38,adayinthelifeofbulaylay,Love the interface and saving pocket is so uni...,5,2022-09-12 05:46:10,GXS Bank
9,gOoiyc,What a joke! Nothing happens after you input y...,1,2022-09-08 07:14:48,GXS Bank
103,Kidaddictedtofacebook,Best get back to the drawing board. Design thi...,1,2022-09-06 23:39:37,GXS Bank
27,VivianARPC,Downloaded the app but doesnt seem to be able ...,2,2022-09-06 17:57:25,GXS Bank


In [29]:
# Function to convert emojis to words using emoji library mapping
def convert_emojis_to_words(text):
    converted_text = emoji.demojize(text)
    return converted_text

# Apply the function to the 'text_cleaned' column in the DataFrame
GXS_apple_df['Review'] = GXS_apple_df['Review'].apply(convert_emojis_to_words)

In [30]:
# Install Langdetect
!pip install langdetect

Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993225 sha256=dc7c8ef48e1d490991e831b2673d80ec0e21c753f3075f53a55745d9cde6c75e
  Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9


In [31]:
# UserName & Review may contain non-english characters
# Remove rows that have non-english reviews
from langdetect import detect

GXS_apple_df = GXS_apple_df[GXS_apple_df.Review.apply(detect).eq('en')]

In [32]:
GXS_apple_df

Unnamed: 0,UserName,Review,Score,Date,Bank
41,Tiny Particles,The app is easy to navigate and optimise for p...,5,2024-02-25 07:24:03,GXS Bank
19,HetTienDownGameBanQuyen,Extremely disappointed with the GXS app. Despi...,1,2024-02-24 06:12:02,GXS Bank
66,Nrhdxtan,Show allow to increase amount not reduce and s...,1,2024-02-21 14:03:59,GXS Bank
30,OnTheRoad9988,This app is easy to use.,5,2024-02-10 03:22:39,GXS Bank
81,princesstazsdff,When you asked your staff to write a 5* review...,1,2024-01-18 08:45:46,GXS Bank
...,...,...,...,...,...
36,The Average Saver,Why put an app on the store when you are not r...,1,2022-09-16 07:19:33,GXS Bank
38,adayinthelifeofbulaylay,Love the interface and saving pocket is so uni...,5,2022-09-12 05:46:10,GXS Bank
9,gOoiyc,What a joke! Nothing happens after you input y...,1,2022-09-08 07:14:48,GXS Bank
103,Kidaddictedtofacebook,Best get back to the drawing board. Design thi...,1,2022-09-06 23:39:37,GXS Bank


In [33]:
GXS_apple_df.to_csv('GXS_Bank_Apple.csv')
files.download('GXS_Bank_Apple.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# TO-DO: Comparing with other digital banks
Scrap Google Play Store & App Store for the following banks
1. GXS Bank
2. Revolut
3. Wise
4. Mari Bank
5. ANEXT Bank
6. Trust Bank

# Combine all scraped datasets with standardised format
UserName, Review, Score, Date, Bank