Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix image search selectors for Bing and Google. #149

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion GoogleScraper/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,4 +456,4 @@ def main(return_results=False, parse_cmd_line=True, config_from_dict=None):
session.commit()

if return_results:
return scraper_search
return session
4 changes: 2 additions & 2 deletions GoogleScraper/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ class GoogleParser(Parser):
image_search_selectors = {
'results': {
'de_ip': {
'container': 'li#isr_mc',
'container': '#isr_mc',
'result_container': 'div.rg_di',
'link': 'a.rg_l::attr(href)'
},
Expand Down Expand Up @@ -626,7 +626,7 @@ class BingParser(Parser):
'ch_ip': {
'container': '#dg_c .imgres',
'result_container': '.dg_u',
'link': 'a.dv_i::attr(m)'
'link': 'a::attr(m)'
},
}
}
Expand Down
2 changes: 1 addition & 1 deletion GoogleScraper/search_engine_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@

"""
bing_search_params = {

'adlt': 'off'
}

"""
Expand Down
21 changes: 8 additions & 13 deletions GoogleScraper/selenium_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class SelScrape(SearchEngineScrape, threading.Thread):
'google': '#pnnext',
'yandex': '.pager__button_kind_next',
'bing': '.sb_pagN',
'yahoo': '#pg-next',
'yahoo': '.compPagination .next',
'baidu': '.n',
'ask': '#paging div a.txt3.l_nu',
'blekko': '',
Expand Down Expand Up @@ -455,13 +455,16 @@ def _find_next_page_element(self):
WebDriverWait(self.webdriver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
except (WebDriverException, TimeoutException) as e:
self._save_debug_screenshot()
raise Exception('{}: Cannot locate next page element: {}'.format(self.name, str(e)))
# raise Exception('{}: Cannot locate next page element: {}'.format(self.name, str(e)))

return self.webdriver.find_element_by_css_selector(selector)

elif self.search_type == 'image':
self.page_down()
return True
if self.search_engine_name == 'google':
return self.webdriver.find_element_by_css_selector('input._kvc')
else:
return True

def wait_until_serp_loaded(self):
"""
Expand Down Expand Up @@ -599,17 +602,9 @@ def page_down(self):
Used for next page in image search mode or when the
next results are obtained by scrolling down a page.
"""
js = '''
var w = window,
d = document,
e = d.documentElement,
g = d.getElementsByTagName('body')[0],
y = w.innerHeight|| e.clientHeight|| g.clientHeight;

window.scrollBy(0,y);
return y;
'''
js = 'window.scrollTo(0,document.body.scrollHeight);'

time.sleep(5)
self.webdriver.execute_script(js)

def run(self):
Expand Down