-
Notifications
You must be signed in to change notification settings - Fork 0
/
answer.py
48 lines (37 loc) · 1.57 KB
/
answer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import sys
class Fetcher:
def __init__(self, url):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
self.driver = webdriver.Chrome(options=chrome_options)
self.driver.wait = WebDriverWait(self.driver, 5)
self.url = url
print(self.url)
def lookup(self):
self.driver.get(self.url)
try:
ip = self.driver.wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "gsfi")
))
except:
print("failed")
soup = BeautifulSoup(self.driver.page_source, "html.parser")
answer = soup.select_one("div[data-attrid='wa:/description'] > span")
if not answer:
answer = soup.select_one("div[data-attrid='kc:/common:synonyms'] > span")
if not answer:
answer = soup.select_one("span[jsname='W297wb']")
if not answer:
answer = soup.select_one("div.zCubwf")
# Save the page source for debugging purposes
with open("test.html", "w+", encoding="utf-8") as f:
f.write(str(soup))
# Return the answer or "I dont know." if no answer was found
return answer.get_text() if answer else "I dont know."