# Google Play Game Reviews Sentiment Analysis

## Import Library

In [4]:
# Common python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import os

# for cleaner output
from tqdm import tqdm
from IPython.display import clear_output

# Data Preprocessing Libraries
from sklearn.model_selection import train_test_split

# Deep Learning Libraries
import torch

# Web Scraping Libraries
from google_play_scraper import reviews

# Text preprocessing/cleaning Libraries
import nltk
import re # regex
import string 
import contractions
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer # or LancasterStemmer, RegexpStemmer, SnowballStemmer

# Text Embedding
## BOW

## TF-IDF

## Word2Vec

## Glove




## Device Selection

In [3]:
# Detect available device
if torch.cuda.is_available():
    # check if ROCm backend is active
    if torch.version.hip is not None:
        backend = "ROCm"
    else:
        backend = "CUDA"

    device = torch.device("cuda")
    print(f"PyTorch is using GPU: {torch.cuda.get_device_name(0)}")
    print(f"Backend: {backend}")
else:
    device = torch.device("cpu")
    print("PyTorch is not using GPU â€” running on CPU")



PyTorch is using GPU: AMD Radeon Graphics
Backend: ROCm


## Web Scraping

we will get the reviews of
- Mobile Legends: Bang Bang https://play.google.com/store/apps/details?id=com.mobile.legends&hl=en
    - appid: com.mobile.legends&hl
- LINE Let's Get Rich https://play.google.com/store/apps/details?id=com.linecorp.LGGRTHN&hl=en
    - appid: com.linecorp.LGGRTHN&hl
- Subway Surfers https://play.google.com/store/apps/details?id=com.kiloo.subwaysurf
    - appid: com.kiloo.subwaysurf

the appid is taken from the end of the link

In [5]:
playstore_review_data_path = "./cache/dataset"
os.makedirs(playstore_review_data_path, exist_ok=True) # if the directory exist no need to make

app_id_list = [
    "com.mobile.legends&hl",
    "com.linecorp.LGGRTHN&hl",
    "com.kiloo.subwaysurf"
]

# for each game, we would pull 1000 reviews with 5, 4, 3, 2, 1 to ensure data balance.
# since all the games has more than 1M reviews it is safe to gather 5000 data per game.

# extract reviews function
def get_game_reviews(app_id_list: list, n_reviews: int = 1000, language: str  = "en", country: str = "us", score_filter: list = [5, 4, 3, 2, 1]):# -> list[dict]:

    def extract_reviews(app_id: str, n_reviews: int = 1000, language: str  = "en", country: str = "us", score_filter: int = 5):# -> List[dict]:
        # The "reviews" function below returns "result" (a list of reviews)
        # and a "continuation_token" for fetching the next batch (not used here).
        result, _ = reviews(app_id,
                            lang = language,
                            country = country,
                            count = n_reviews,
                            filter_score_with = score_filter
                        )
        
        return result
    
    # Create an empty list to store all the reviews
    all_reviews = []

    for app_id in tqdm(app_id_list, desc="Extracting Reviews ...", unit="app"):
        clear_output(wait=True)

        for score in score_filter:
            game_reviews = extract_reviews(app_id, score_filter=score)
            for single_review in game_reviews:
                all_reviews.append(single_review)
    
    print(f"Done Extracting Reviews... \n data_length   :{len(all_reviews)}")
    return all_reviews