In [176]:
from app_store_scraper import AppStore

from google_play_scraper import Sort, reviews_all

import pandas as pd

import numpy as np

import json

import datetime
    

# Android Scrapping

In [177]:
#Scrapping and specifying configuration
fi_reviews = reviews_all(
    'net.easypark.android', # choosing packagename
    sleep_milliseconds=0, 
    lang='en', #choose language from en,fi,fr,es,no but use english one time to not duplicate the data 
    country='fi', #keep changing countries from fi,no,fr,es    
)

dSet = pd.DataFrame(np.array(fi_reviews),columns=['review'])
dSet = dSet.join(pd.DataFrame(dSet.pop('review').tolist()))

dSet.shape


(1932, 10)

In [178]:
#Dropping unecessary columns and rearraging them
dSet.drop(['reviewId','userImage','thumbsUpCount','replyContent','repliedAt'],axis=1,inplace=True)
dSet.rename({'score':'rating', 'reviewCreatedVersion':'reviewVersion', 'at':'date'}, axis='columns',inplace=True)
dSet = dSet[["userName","content","rating","date","reviewVersion"]]
#Removing the time from the date
for i in dSet.index:
    dSet.at[i, 'date'] = dSet.at[i, 'date'].date()

In [179]:
#Saving To easypark csv file
dSet.to_csv('/Users/WorkSpace/Desktop/ProjectTest/easypark.csv', mode='a', header=False, sep=';')

# IOS Scrapping

In [180]:
my_app = AppStore(
  country='de',        #keep changing countries from fi,no,dk,se,fr,es,de
  app_name='easypark',  
  app_id=449594317    
)     
## Use review method to scrape reviews from App Store
my_app.review()

2021-10-12 04:14:39,427 [INFO] Base - Initialised: AppStore('de', 'easypark', 449594317)
2021-10-12 04:14:39,429 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/de/app/easypark/id449594317
2021-10-12 04:14:44,620 [INFO] Base - [id:449594317] Fetched 660 reviews (660 fetched in total)
2021-10-12 04:14:45,214 [INFO] Base - [id:449594317] Fetched 710 reviews (710 fetched in total)


In [181]:
df = pd.DataFrame(np.array(my_app.reviews),columns=['review'])

df = df.join(pd.DataFrame(df.pop('review').tolist()))

In [182]:
#### Dropping unnecessary columns 
df.drop(['isEdited','title','developerResponse'],axis=1,inplace=True)
df.rename({'review':'content'}, axis='columns',inplace=True)

In [183]:
#Assigning Versions to IOS reviews according to version history and review dates
myList=[[datetime.datetime(2021,10,4),'15.28.0'],[datetime.datetime(2021,9,20),'15.27.1'],
        [datetime.datetime(2021,9,14),'15.27.0'],[datetime.datetime(2021,8,24),'15.26.0'],
        [datetime.datetime(2021,8,10),'15.25.0'],[datetime.datetime(2021,7,12),'15.24.0']
       ,[datetime.datetime(2021,6,23),'15.23.0'],[datetime.datetime(2021,6,6),'15.22.0']
       ,[datetime.datetime(2021,5,17),'15.21.0'],[datetime.datetime(2021,4,24),'15.20.1']
       ,[datetime.datetime(2021,4,20),'15.20.0'],[datetime.datetime(2021,3,29),'15.19.0']
       ,[datetime.datetime(2021,3,20),'15.18.1'],[datetime.datetime(2021,2,23),'15.17.1']
       ,[datetime.datetime(2021,2,17),'15.17.0'],[datetime.datetime(2021,1,25),'15.16.0']
       ,[datetime.datetime(2021,1,11),'15.15.0'],[datetime.datetime(2020,12,8),'15.14.1']
       ,[datetime.datetime(2020,12,3),'15.14.0'],[datetime.datetime(2020,11,27),'15.13.1']
       ,[datetime.datetime(2020,11,12),'15.13'],[datetime.datetime(2020,10,7),'15.12.1']
       ,[datetime.datetime(2020,10,5),'15.12.0'],[datetime.datetime(2020,9,28),'15.11.1']
       ,[datetime.datetime(2020,8,26),'15.11.0'],[datetime.datetime(2020,7,27),'15.10.0']
       ,[datetime.datetime(2020,7,9),'15.9'],[datetime.datetime(2020,6,15),'15.8.1']
       ,[datetime.datetime(2020,6,4),'15.8'],[datetime.datetime(2020,5,30),'15.7']
       ,[datetime.datetime(2020,5,7),'15.6'],[datetime.datetime(2020,4,23),'15.5']
       ,[datetime.datetime(2020,3,31),'15.4'],[datetime.datetime(2020,3,12),'15.3']
       ,[datetime.datetime(2020,3,5),'15.2'],[datetime.datetime(2020,2,27),'15.1.1']
       ,[datetime.datetime(2020,2,20),'15.1'],[datetime.datetime(2020,2,7),'15.0']
       ,[datetime.datetime(2020,1,10),'14.8'],[datetime.datetime(2019,12,11),'14.6.1']
       ,[datetime.datetime(2019,12,5),'14.6'],[datetime.datetime(2019,11,21),'14.5.1']
       ,[datetime.datetime(2019,11,20),'14.5'],[datetime.datetime(2019,11,12),'14.4.1']
       ,[datetime.datetime(2019,11,7),'14.4'],[datetime.datetime(2019,10,24),'14.3']
       ,[datetime.datetime(2019,10,7),'14.2'],[datetime.datetime(2019,9,30),'14.1']
       ,[datetime.datetime(2019,9,11),'14.0'],[datetime.datetime(2019,8,28),'13.9']
       ,[datetime.datetime(2019,8,14),'13.8'],[datetime.datetime(2019,7,29),'13.7']
       ,[datetime.datetime(2019,7,17),'13.6.1'],[datetime.datetime(2019,7,11),'13.6']
       ,[datetime.datetime(2019,7,4),'13.5.2'],[datetime.datetime(2019,7,3),'13.5.1']
       ,[datetime.datetime(2019,6,27),'13.5'],[datetime.datetime(2019,6,13),'13.4.1']
       ,[datetime.datetime(2019,6,12),'13.4'],[datetime.datetime(2019,5,23),'13.3']
       ,[datetime.datetime(2019,5,14),'13.2'],[datetime.datetime(2019,4,29),'13.1']
       ,[datetime.datetime(2019,4,15),'13.0'],[datetime.datetime(2019,3,28),'12.7']
       ,[datetime.datetime(2019,2,27),'12.6'],[datetime.datetime(2019,2,12),'12.5']
       ,[datetime.datetime(2019,1,22),'12.4'],[datetime.datetime(2019,1,15),'12.3']
       ,[datetime.datetime(2019,1,8),'12.2'],[datetime.datetime(2019,1,5),'12.0.1']
       ,[datetime.datetime(2018,12,18),'12.0'],[datetime.datetime(2018,12,2),'11.7.1']
       ,[datetime.datetime(2018,11,29),'11.7'],[datetime.datetime(2018,10,31),'11.6']
       ,[datetime.datetime(2018,10,16),'11.5'],[datetime.datetime(2018,9,27),'11.4']
       ,[datetime.datetime(2018,9,13),'11.3'],[datetime.datetime(2018,8,27),'11.2.1']
       ,[datetime.datetime(2018,8,27),'11.2.1'],[datetime.datetime(2018,8,13),'11.2']
       ,[datetime.datetime(2018,7,24),'11.1'],[datetime.datetime(2018,7,10),'11.0.1']
       ,[datetime.datetime(2018,7,4),'11.0'],[datetime.datetime(2018,6,19),'10.9']
       ,[datetime.datetime(2018,6,12),'10.8'],[datetime.datetime(2018,5,28),'10.7']
       ,[datetime.datetime(2018,5,21),'10.6.1'],[datetime.datetime(2018,5,9),'10.6']
       ,[datetime.datetime(2018,4,12),'10.5.2'],[datetime.datetime(2018,4,9),'10.5.1']
       ,[datetime.datetime(2018,4,5),'10.5'],[datetime.datetime(2018,2,26),'10.4.1']
       ,[datetime.datetime(2018,2,21),'10.4'],[datetime.datetime(2018,2,12),'10.3.1']
       ,[datetime.datetime(2018,2,6),'10.3'],[datetime.datetime(2018,1,17),'10.2.1']
       ,[datetime.datetime(2017,12,14),'10.2'],[datetime.datetime(2017,12,6),'10.1']
       ,[datetime.datetime(2017,11,20),'10.0.3'],[datetime.datetime(2017,11,15),'10.0.2']
       ,[datetime.datetime(2017,10,26),'10.0.1'],[datetime.datetime(2017,10,23),'10.0']
       ,[datetime.datetime(2017,9,23),'5.9.1'],[datetime.datetime(2017,8,4),'5.9']
       ,[datetime.datetime(2017,7,4),'5.8.9'],[datetime.datetime(2017,6,26),'5.8.8']
       ,[datetime.datetime(2017,6,15),'5.8.7'],[datetime.datetime(2017,5,9),'5.8.6']
       ,[datetime.datetime(2017,4,4),'5.8.5'],[datetime.datetime(2017,1,17),'5.8.4']
       ,[datetime.datetime(2016,12,14),'5.8.3'],[datetime.datetime(2016,12,4),'5.8.2']
       ,[datetime.datetime(2016,11,17),'5.8.1'],[datetime.datetime(2016,11,12),'5.8']
       ,[datetime.datetime(2016,9,20),'5.7.1'],[datetime.datetime(2016,9,17),'5.7']
       ,[datetime.datetime(2016,7,12),'5.6'],[datetime.datetime(2016,5,19),'5.5']
       ,[datetime.datetime(2016,4,9),'5.4.1'],[datetime.datetime(2016,3,23),'5.4']
       ,[datetime.datetime(2016,2,15),'5.3'],[datetime.datetime(2015,12,14),'5.2']
       ,[datetime.datetime(2015,10,21),'5.1'],[datetime.datetime(2015,9,2),'5.0']
       ,[datetime.datetime(2015,7,8),'4.9.1'],[datetime.datetime(2015,6,22),'4.8']
       ,[datetime.datetime(2015,4,20),'4.5'],[datetime.datetime(2015,3,20),'4.4']
       ,[datetime.datetime(2015,2,24),'4.3'],[datetime.datetime(2015,1,22),'4.2']
       ,[datetime.datetime(2014,12,16),'4.1'],[datetime.datetime(2014,11,12),'4.0']
       ,[datetime.datetime(2014,10,19),'3.9.2'],[datetime.datetime(2014,9,22),'3.9']
       ,[datetime.datetime(2014,6,23),'3.8'],[datetime.datetime(2014,6,2),'3.7']
       ,[datetime.datetime(2014,5,16),'3.6.3'],[datetime.datetime(2014,4,14),'3.6']
       ,[datetime.datetime(2014,3,12),'3.5.3'],[datetime.datetime(2014,1,19),'3.5.1']
       ,[datetime.datetime(2014,1,3),'3.5'],[datetime.datetime(2013,11,21),'3.4']
       ,[datetime.datetime(2013,11,8),'3.3.1'],[datetime.datetime(2013,9,24),'3.3']
       ,[datetime.datetime(2013,6,27),'3.2'],[datetime.datetime(2013,5,21),'3.1']
       ,[datetime.datetime(2013,4,26),'3.0.12'],[datetime.datetime(2013,4,3),'3.0.10']
       ,[datetime.datetime(2013,2,15),'3.0.9'],[datetime.datetime(2013,1,9),'3.0']
       ,[datetime.datetime(2012,11,18),'2.2'],[datetime.datetime(2012,8,24),'2.1']
       ,[datetime.datetime(2012,6,14),'2.0'],[datetime.datetime(2011,8,12),'1.1']
       ,[datetime.datetime(2011,7,20),'1.0']]

curDiff=1000000
curVersion='0'
for i in df.index:
    curDiff=1000000
    curVersion='0'
    for x in myList:
        if((df.at[i, 'date']-x[0]).days>0 and ((df.at[i, 'date']-x[0]).days)<curDiff): #Assigning the version witht the closest date to the review
            curDiff=(df.at[i, 'date']-x[0]).days
            curVersion=x[1] 
    
    df.at[i,'reviewVersion']=curVersion
        

In [184]:
#Removing the time from the date
for i in df.index:
    df.at[i, 'date'] = df.at[i, 'date'].date()
#Reordering the columns
df = df[["userName","content","rating","date","reviewVersion"]]

In [185]:
df.shape

(710, 5)

In [186]:
#Saving To easypark csv file
df.to_csv('easypark.csv', mode='a', header=False, sep=';')