-
Notifications
You must be signed in to change notification settings - Fork 0
/
spider.py
47 lines (40 loc) · 1.35 KB
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import json
import time
from secret import siteUrl
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
def openFile(file_name):
with open(file_name) as ip:
stark= json.load(ip)
return stark
def headless():
options = Options()
options.headless = True
return options
def startDriver(link):
#driver = webdriver.Firefox(executable_path='/home/skaarface/Apps/WebDrivers/geckodriver')
driver = webdriver.Firefox(options=headless(),executable_path='/home/skaarface/Apps/WebDrivers/geckodriver')
driver.get(siteUrl+link)
return driver
def collectLabels(driver):
return driver.find_elements_by_xpath("//td[contains(@class,'spec_ttle')]")
def collectValues(driver):
return driver.find_elements_by_xpath("//td[contains(@class,'spec_des')]")
info=openFile("outputs/step1_part_a.json")
fin=[]
done=0
try:
for i in info:
mini_driver=startDriver(info[i])
dic={}
dic["Name"]=mini_driver.find_element_by_xpath("//h1[contains(@class,'h1_pro_head')]").text
for label,values in zip(collectLabels(mini_driver),collectValues(mini_driver)):
dic[label.text]=values.text
fin.append(dic)
done+=1
print(done)
mini_driver.close()
#mini_driver.close()
finally:
with open("outputs/step2.json","w") as op:
json.dump(fin, op)