In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime, timedelta
from IPython.display import clear_output

from io import BytesIO, StringIO
import os
import boto3

import nltk
import demoji
import google_play_scraper
from google_play_scraper import app
from app_store_scraper import AppStore
from pprint import pprint
from wordcloud import WordCloud
from nltk.corpus import stopwords
from pymystem3 import Mystem
from string import punctuation
from os import listdir
from os.path import isfile, join
from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize
import pickle

from creds import (
    AWS_SECRET_ACCESS_KEY,
    AWS_SECRET_KEY_ID,
    S3_BUCKET_RESULT,
    S3_ENDPOINT_URL
)

# nltk.download("stopwords", download_dir='../data')
# nltk.download("punkt", download_dir='../data')
# with open('/home/jovyan/nltk_data/corpora/stopwords/russian', 'r') as file:
#     stopwords = [line.rstrip() for line in file]
warnings.filterwarnings("ignore")

clear_output()

In [None]:
with open('../data/corpora/stopwords/russian', 'r') as file:
    stopwords = [line.rstrip() for line in file]

### scrape revies from app

#### AppStore

In [None]:
def parse_apple_data(app_name="Вкусно — и точка", app_id=896111038, how_many=3, 
                     sleep=2, after=datetime.today() - timedelta(days=31)) -> pd.DataFrame:
    """
    parse and save apple reviews
    """
    revs = AppStore(country="ru", app_name=app_name.encode('utf-8'), app_id=app_id)
    revs.review(how_many=how_many, sleep=sleep, after=after)
    pprint(revs.reviews_count)
    # save data
    df_revs = pd.DataFrame(revs.reviews)
    file_name = str(app_id) + '_' + after.strftime('%Y-%m-%d') + '.csv'
    # df_revs.to_csv('data/apple/' + file_name, index=None)
    return df_revs
    
def preproc_apple(_df):
    """
    drop and rename columns
    """
    cols_to_drop = ['userName']
    _df = _df.drop(columns=cols_to_drop)
    _df = _df.rename(columns={'date':'rev_date', 'review': 'rev_text', 'title' : 'rev_title'})
    _df.rev_date = _df.rev_date.astype('datetime64[ns]')
    return _df

In [None]:
#app_name, app_id ="Вкусно — и точка", 896111038
#app_name, app_id ="БУРГЕР КИНГ - акции, доставка", 1257821028
#app_name, app_id ="KFC: Доставка еды, купоны", 1074266177
app_name, app_id ="Додо Пицца: доставка, ресторан", 894649641

apple_reviews = parse_apple_data(app_name=app_name, 
                 app_id=app_id, 
                 how_many=1, 
                 sleep=6, 
                 after=(datetime.today() - timedelta(days=31))
                 )
apple_reviews

2024-03-21 14:54:20,426 [INFO] Base - Initialised: AppStore('ru', 'b-xd0-x94-xd0-xbe-xd0-xb4-xd0-xbe-xd0-x9f-xd0-xb8-xd1-x86-xd1-x86-xd0-xb0-xd0-xb4-xd0-xbe-xd1-x81-xd1-x82-xd0-xb0-xd0-xb2-xd0-xba-xd0-xb0-xd1-x80-xd0-xb5-xd1-x81-xd1-x82-xd0-xbe-xd1-x80-xd0-xb0-xd0-xbd-', 894649641)
2024-03-21 14:54:20,427 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/ru/app/b-xd0-x94-xd0-xbe-xd0-xb4-xd0-xbe-xd0-x9f-xd0-xb8-xd1-x86-xd1-x86-xd0-xb0-xd0-xb4-xd0-xbe-xd1-x81-xd1-x82-xd0-xb0-xd0-xb2-xd0-xba-xd0-xb0-xd1-x80-xd0-xb5-xd1-x81-xd1-x82-xd0-xbe-xd1-x80-xd0-xb0-xd0-xbd-/id894649641
2024-03-21 14:54:26,562 [INFO] Base - [id:894649641] Fetched 0 reviews (0 fetched in total)
2024-03-21 14:54:26,649 [INFO] Base - [id:894649641] Fetched 1 reviews (1 fetched in total)


1


Unnamed: 0,date,developerResponse,review,rating,isEdited,title,userName
0,2024-03-13 10:26:52,"{'id': 42611801, 'body': 'Минимальная сумма на...","Решил скачать это приложение, потому что был о...",1,False,GovnoPizza,Энцо Горломи


#### Google Play

In [None]:

result, continuation_token = google_play_scraper.reviews(
    'com.apegroup.mcdonaldsrussia',
    # sleep_milliseconds=40,
    lang='ru', # defaults to 'en'
    country='ru', # defaults to 'us'
    # sort='Sort.NEWEST', # defaults to Sort.NEWEST
    count=100, # defaults to 100
    filter_score_with=2 # defaults to None(means all score)
)


In [None]:
pd.DataFrame(result).head()

Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,appVersion
0,7297c2a8-ea47-43b9-b54b-a4753f7c4b21,Татьяна Парамонова,https://play-lh.googleusercontent.com/a-/ALV-U...,"Не могу войти в приложение, выскакивает окошко...",2,0,,2024-03-18 20:43:25,"Татьяна, здравствуйте!\nПроверьте настройки ва...",2024-03-20 02:15:02,
1,288eeded-b68c-4948-8966-4b62c4d5a13b,Елена Гусева,https://play-lh.googleusercontent.com/a/ACg8oc...,"Нет доставки по нашему адресу, уже около месяц...",2,0,,2024-03-18 15:50:38,"Елена, действительно, пока не возим в каждый д...",2024-03-18 21:39:02,
2,26a2408b-e0fd-49ce-b8ac-3baa413223d0,Максим Шутов,https://play-lh.googleusercontent.com/a/ACg8oc...,Сделайте оповещение о предстоящем обновлении и...,2,1,10.9.0,2024-03-18 08:42:15,"Максим, здравствуйте!\nСпасибо, что поделились...",2024-03-20 02:47:05,10.9.0
3,1da74d20-20f3-4633-b512-808cdbba1665,Иван Овчинников,https://play-lh.googleusercontent.com/a-/ALV-U...,"Я бы назвал это заведение ""жрите и точка"", суд...",2,0,10.9.0,2024-03-17 12:36:58,"Иван, здравствуйте!\nЖаль, что вы так считаете...",2024-03-20 08:10:01,10.9.0
4,93fdf088-76b6-47fc-b5ef-c859e7e925d7,Николай Помещиков,https://play-lh.googleusercontent.com/a/ACg8oc...,Часто тормозит,2,0,10.9.0,2024-03-17 11:30:45,"Николай, добрый день!\nНапишите, пожалуйста, н...",2024-03-20 07:46:31,10.9.0
