Skip to content

Commit b78fac2

Browse files
Merge pull request avinashkranjan#1146 from RohiniRG/RohiniRG-apps
Playstore scraper
2 parents d9b252e + 6813f5d commit b78fac2

File tree

4 files changed

+210
-0
lines changed

4 files changed

+210
-0
lines changed

PlaystoreScraper/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Google Playstore Scraper
2+
3+
- This script helps to scrape Google Playstore for a desired query relating to apps to obtain all relevant data regarding the resulting apps.
4+
5+
- In the `fetch_apps.py` , we take user input for the query, and we fetch and store all the app information related to apps of this query in a database file.
6+
7+
- In the `display_apps.py` , we display the desired results from the database to the user.
8+
9+
## Setup instructions
10+
11+
- The requirements can be installed as follows:
12+
13+
```shell
14+
$ pip install -r requirements.txt
15+
```
16+
17+
## Working screenshots
18+
19+
![Image](https://i.imgur.com/BYKNvFR.png)
20+
21+
## Author
22+
[Rohini Rao](www.github.com/RohiniRG)
23+

PlaystoreScraper/display_apps.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import sqlite3
2+
import os
3+
4+
5+
def sql_connection():
6+
"""
7+
Establishes a connection to the SQL file database
8+
:return connection object:
9+
"""
10+
path = os.path.abspath('PlaystoreDatabase.db')
11+
con = sqlite3.connect(path)
12+
return con
13+
14+
15+
def sql_fetcher(con):
16+
"""
17+
Fetches all the with the given query from our database
18+
:param con:
19+
:return:
20+
"""
21+
query = input("\nEnter query to search: r/")
22+
count = 0
23+
cur = con.cursor()
24+
cur.execute('SELECT * FROM apps') # SQL search query
25+
rows = cur.fetchall()
26+
27+
for r in rows:
28+
if query in r:
29+
count += 1
30+
print(f'\nURL: {r[1]}\nNAME: {r[2]}\nRATING: {r[3]}\n'
31+
f'REVIEWS: {r[4]}\nINSTALLS: {r[5]}\nVERSION: {r[6]}'
32+
f'\nLASTUPDATE: {r[7]}\nCOMPANY: {r[8]}\nCONTACT: {r[9]}')
33+
34+
if count:
35+
print(f'{count} posts fetched from database\n')
36+
else:
37+
print('\nNo posts stored for this query\n')
38+
39+
40+
con = sql_connection()
41+
42+
while 1:
43+
sql_fetcher(con)
44+
45+
ans = input('\nPress (y) to continue or any other key to exit: ').lower()
46+
if ans == 'y':
47+
continue
48+
else:
49+
print('\nExiting..\n')
50+
break
51+

PlaystoreScraper/fetch_apps.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import time
2+
from selenium import webdriver
3+
import sqlite3
4+
5+
6+
def sql_connection():
7+
"""
8+
Establishes a connection to the SQL file database
9+
:return connection object:
10+
"""
11+
con = sqlite3.connect('PlaystoreDatabase.db')
12+
return con
13+
14+
15+
def sql_table(con):
16+
"""
17+
Creates a table in the database (if it does not exist already)
18+
to store the app info
19+
:param con:
20+
:return:
21+
"""
22+
cur = con.cursor()
23+
cur.execute("CREATE TABLE IF NOT EXISTS apps(QUERY text, URL text, NAME text, RATING text, "
24+
" REVIEWS text, INSTALLS text, VERSION text, LASTUPDATE text, "
25+
" COMPANY text, CONTACT text)")
26+
con.commit()
27+
28+
29+
def sql_insert_table(con, entities):
30+
"""
31+
Inserts the desired data into the table to store app info
32+
:param con:
33+
:param entities:
34+
:return:
35+
"""
36+
cur = con.cursor()
37+
cur.execute('INSERT INTO apps(QUERY text, URL, NAME, RATING, REVIEWS, '
38+
'INSTALLS, VERSION, LASTUPDATE, COMPANY, CONTACT) '
39+
'VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', entities)
40+
con.commit()
41+
42+
43+
driver = webdriver.Chrome()
44+
45+
con = sql_connection()
46+
sql_table(con)
47+
48+
while 1:
49+
query = input("\nEnter search query: ")
50+
51+
driver.get(f'https://play.google.com/store/search?q={query}&c=apps')
52+
53+
print('\nGetting all the desired info...\n')
54+
time.sleep(5)
55+
56+
last_height = driver.execute_script("return document.body.scrollHeight")
57+
time.sleep(5)
58+
59+
while True:
60+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
61+
62+
time.sleep(5)
63+
64+
new_height = driver.execute_script("return document.body.scrollHeight")
65+
if new_height == last_height:
66+
break
67+
last_height = new_height
68+
69+
70+
store_urls = []
71+
elems = driver.find_elements_by_xpath("//a[@href]")
72+
for elem in elems:
73+
if "details?id" in elem.get_attribute("href"):
74+
store_urls.append((elem.get_attribute("href")))
75+
76+
store_urls = list(dict.fromkeys(store_urls))
77+
78+
for every in store_urls:
79+
try:
80+
driver.get(every)
81+
url = every
82+
time.sleep(3)
83+
84+
header1 = driver.find_element_by_tag_name("h1")
85+
name = header1.text
86+
87+
star = driver.find_element_by_class_name("BHMmbe")
88+
rating = star.text
89+
90+
comments = driver.find_element_by_class_name("EymY4b")
91+
reviews = comments.text.split()[0]
92+
93+
stat_info_table = driver.find_elements_by_class_name("htlgb")
94+
stats = []
95+
for x in range (len(stat_info_table)):
96+
if x % 2 == 0:
97+
stats.append(stat_info_table[x].text)
98+
99+
stat_header = driver.find_elements_by_class_name("BgcNfc")
100+
for x in range (len(stat_header)):
101+
if stat_header[x].text == "Installs":
102+
installs = stats[x]
103+
104+
if stat_header[x].text == "Current Version":
105+
version = stats[x]
106+
107+
if stat_header[x].text == "Updated":
108+
lastupdate = stats[x]
109+
110+
if stat_header[x].text == "Offered By":
111+
company = stats[x]
112+
113+
if stat_header[x].text == "Developer":
114+
for y in stats[x].split("\n"):
115+
if "@" in y:
116+
contact = y
117+
break
118+
119+
entities = (query, url, name, rating, reviews, installs, version, lastupdate
120+
version, lastupdate, company, email)
121+
sql_insert_table(con, entities)
122+
123+
124+
except Exception as e:
125+
continue
126+
127+
print('\nAll info collected successfully!!\n')
128+
129+
ans = input('Press (y) to continue or any other key to exit: ').lower()
130+
if ans == 'y':
131+
continue
132+
else:
133+
print('Exiting..')
134+
break
135+

PlaystoreScraper/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
selenium==3.141.0

0 commit comments

Comments
 (0)