In [1]:
from splinter import Browser
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.remote.webelement import WebElement
from typing import Callable as function
import time, re, sys

In [2]:
MAX_PERSIST_ATTEMPTS = 10

def select_option_by_value(select:WebElement, val:str)->type(None):
    pattern = re.compile(r"^.*(){str}$".format(str=val))
    select.click()
    options = select.find_elements_by_tag_name('option')
    for i in range(0, len(options)):
        option = options[i]
        if pattern.match(option.text):
            option.click()
            break

def persist_find_elements_async(
    browser:Browser, locator:tuple, visible:bool, predicate:function,
    timeout:int = 3, show_progress:bool = True
)->type(list):
    elements = None
    for i in range(0, MAX_PERSIST_ATTEMPTS):
        if show_progress:
            print(
                "Attempt {} of {} [locator: {}]..."\
                .format(i + 1, MAX_PERSIST_ATTEMPTS, locator)
            )
        elements = find_elements_async(browser, locator, visible, timeout, False)
        if predicate(elements):
            break
        time.sleep(1) # awaits 1 second to proceed to next check
    return elements
            
def find_elements_async(
    browser:Browser, locator:tuple, visible:bool,
    timeout:int = 3, show_exceptions:bool = True
)->type(list):
    try:
        if visible:
            return WebDriverWait(browser.driver, timeout).until(EC.visibility_of_all_elements_located(locator))
        else:
            return WebDriverWait(browser.driver, timeout).until(EC.presence_of_all_elements_located(locator))
    except NoSuchElementException:
        if show_exceptions:
            print(
                "No such element found! Locator: '{}'."\
                .format(locator)
            )
    except TimeoutException:
        if show_exceptions:
            print(
                "Async timeout exceeded {}s! Locator: '{}'."\
                .format(timeout, locator)
            )
    except Exception:
        if show_exceptions:
            print(
                "Unhandled exception! Locator: '{}'.\n\nException:\n{}"\
                .format(locator, err)
            )
    return None

def handle_single(browser:Browser, label:str, elements:list)->type(None):
    if not elements is None and len(elements) > 0:
        elements[0].click()
    else:
        browser.quit()
        sys.exit(
            "Element '{}' not found! Cannot proceed."\
            .format(label)
        )

In [3]:
# Init Settings
url = "https://ieeexplore.ieee.org/Xplore/home.jsp"
is_headless = False
search_opts = {
    'item-1': {
        'query': "web scraping",
        'option': "Abstract"
    },
    'item-2': {
        'query': "python",
        'option': "Abstract"
    },
    'from': 2010,
    'to': 2021
}

# Chrome Driver Settings
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
cdriver_path = { 'executable_path':'chromedriver' }

# Browser Settings
browser = Browser("chrome", **cdriver_path, headless = is_headless, options = options)

In [4]:
# Access page
browser.visit(url)

In [5]:
banner = find_elements_async(
    browser = browser,
    locator = (By.CSS_SELECTOR, 'div[class="cc-compliance"] a[class="cc-btn cc-dismiss"]'),
    visible = True
)
handle_single(browser, 'Banner', banner)

In [6]:
# Click on Advanced Search
browser.links.find_by_partial_href('advanced').click()

In [7]:
forms = persist_find_elements_async(
    browser = browser,
    locator = (By.TAG_NAME, 'form'),
    visible = False,
    predicate = lambda elements : len(elements) == 3,
    timeout = 10,
    show_progress = True
)
print("total forms -> {}".format(len(forms)))

Attempt 1 of 10 [locator: ('tag name', 'form')]...
Attempt 2 of 10 [locator: ('tag name', 'form')]...
Attempt 3 of 10 [locator: ('tag name', 'form')]...
total forms -> 3


In [8]:
# Get valid inputs & selects
adv_form = forms[-1]
adv_form_inputs = adv_form.find_elements_by_tag_name('input')
adv_form_selects = adv_form.find_elements_by_tag_name('select')
print("total inputs -> {}".format(len(adv_form_inputs)))
print("total selects -> {}".format(len(adv_form_selects)))

total inputs -> 7
total selects -> 5


In [9]:
# Fill specific inputs
adv_form_inputs[0].send_keys(search_opts['item-1']['query']);
adv_form_inputs[1].send_keys(search_opts['item-2']['query']);

In [10]:
# Select specific options
select_option_by_value(adv_form_selects[0], search_opts['item-1']['option']);
select_option_by_value(adv_form_selects[2], search_opts['item-2']['option']);

In [11]:
# Get specific elements to remove
adv_form_to_rmv = adv_form.find_elements_by_css_selector('div i[class="fa fa-times"]')
print("total X btns -> {}".format(len(adv_form_to_rmv)))

total X btns -> 2


In [12]:
# Remove specific element
adv_form_to_rmv[-1].click()

In [13]:
# Chose 'Specify Year Range' option
adv_form.find_element_by_xpath('//span[text()="Specify Year Range"]').click()

In [14]:
# Get from-to input fields
adv_syr_inputs = adv_form.find_elements_by_css_selector('span[class="row"] span input[type="text"]')
print("total from-to inputs -> {}".format(len(adv_syr_inputs)))

total from-to inputs -> 2


In [15]:
adv_syr_inputs[0].send_keys(search_opts['from'])
adv_syr_inputs[1].send_keys(search_opts['to'])

In [16]:
# Search
adv_form.find_element_by_css_selector('div[class="submit-box"] button[type="submit"]').click()

In [17]:
# Count results
query_results = find_elements_async(
    browser = browser,
    locator = (By.CSS_SELECTOR, 'div[class="List-results-items"]'),
    visible = True,
    timeout = 10
)
total_results = 0
query_spans = browser.find_by_css(
    'section div[class="Dashboard-section Dashboard-section-gray"] ' +
    'div[class="Dashboard-header col-12"] span span[class="strong"]'
)
if len(query_spans) > 1:
    total_results = int(query_spans[1].text.replace(',', ''))
print("{} Displaying {} of {}".format(
    "Found results!" if total_results > 0 else "No results found...",
    len(query_results),
    total_results
    )
)

Found results! Displaying 12 of 12


In [18]:
# Close browser
browser.quit()