Imports

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.common import NoSuchElementException, ElementNotInteractableException, ElementClickInterceptedException, TimeoutException, StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.common.actions.wheel_input import ScrollOrigin

import time
import datetime
import pymongo

from secret import * 
from global_variables import get_collection

Function name: test_button
Purpose: Determines if a given element is in an active state based on its aria-disbaled attribute
Inputs: 
    button: A websocket element whose activity state depends on the value of the aria-disabled attribute
Outputs: A boolean indicating whether the button is enabled (aria-disabled = false)

In [2]:
def test_button(button):
    enabled = button.get_attribute('aria-disabled')
    if enabled == 'false':
        return True
    return False

Function name: button_click
Purpose: Continously clicks an element with a small delay until the element is successfully clicked
Inputs:
    Driver: An active webdriver object
    Button: A websocket element that can be clicked
Outputs:
    Void

In [3]:
def button_click(driver, button):
    button_ready = False
    while not button_ready:
        try:
            button.click()
            button_ready = True
        except ElementClickInterceptedException:
            time.sleep(1)
            continue

Function name: open_site
Purpose: Opens a site at a given url and maximizes the window for standardization
Inputs:
    driver: A webdriver object to open the url with
    url: A string containing the adress of the site to be opened
Outputs: 
    Void

In [4]:
def open_site(driver, url):
    driver.get(url)
    driver.maximize_window()


Function name: login
Purpose: Logs in to the UA scholarship universe portal with the given username and password, getting the driver to the scholarship universe homepage
Inputs:
    driver: A webdriver object on the scholarship universe login page
    username: The UA netID of the user (String)
    password: The UA password of the user (String)
Outputs:
    Void

In [5]:
def login(driver, username, password):
    wait = WebDriverWait(driver, timeout=20, poll_frequency=1)
    wait.until(EC.visibility_of_element_located((By.ID, 'password')))
    username_box = driver.find_element(By.ID, 'username')
    password_box = driver.find_element(By.ID, 'password')
    submit_button = driver.find_element(By.CSS_SELECTOR, '[tabindex="3"]')
    username_box.send_keys(username)
    password_box.send_keys(password)
    submit_button.click()

Function name: open_scholarship_page
Purpose: Navigates from the scholarship universe homepage to the scholarships page
Inputs:
    driver: A webdriver object on the scholarship universe homepage
Outputs:
    Void

In [6]:
def open_scholarship_page(driver):
    wait = WebDriverWait(driver, timeout=20, poll_frequency=1)
    try:
        wait.until(EC.visibility_of_element_located((By.ID, 'student-menu-scholarship-link')))
    except TimeoutException:
        driver.quit()
    scholarship_button = driver.find_element(By.ID, 'student-menu-scholarship-link')
    button_click(driver = driver, button = scholarship_button)

Function name: change_page
Purpose: goes to the next page on the scholarships tab of scholarship universe
Input:
    next_button: the next page button on the site
Output:
    Void

In [7]:
def change_page(next_button):
    ActionChains(driver)\
        .move_to_element(next_button)\
        .perform()
    button_ready = False
    while not button_ready:
        try:
            next_button.click()
            button_ready = True
        except ElementClickInterceptedException:
            driver.implicitly_wait(1)
            continue

Function name: analyze_scholarship
Purpose: collects data viewed by clicking the statistics button on a scholarship
Inputs:
    driver: A webdriver on the scholarships tab of scholarship universe
    scholarship: A websocket element that is an ancestor of only one statistics button
Output:
    stats: A dictionary containing keys for Applications and Difficulty

In [8]:
def analyze_scholarship(driver, scholarship):    
    wait = WebDriverWait(driver, timeout=20, poll_frequency=1)
    wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'button[title="Statistics"]')))
    stats_button = scholarship.find_element(By.CSS_SELECTOR, 'button[title="Statistics"]')
    button_click(driver, stats_button)
    wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[_ngcontent-ng-c4018585498]')))
    stats_boxes = driver.find_elements(By.CSS_SELECTOR, 'div[_ngcontent-ng-c4018585498]>p>b')
    matches = stats_boxes[3].text
    while not matches.isdigit():
        wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'div[_ngcontent-ng-c4018585498]')))
        time.sleep(1)
        got_stats = False
        while not got_stats:
            try:
                stats_boxes = driver.find_elements(By.CSS_SELECTOR, 'div[_ngcontent-ng-c4018585498]>p>b')
                matches = stats_boxes[3].text
                got_stats = True
            except StaleElementReferenceException:
                time.sleep(1)
                continue
    stats = {
        'Applications': stats_boxes[1].text,
        'Difficulty': stats_boxes[2].text,
    }
    button_click(driver, stats_button)
    return stats
            

Function name: scan_scholarships
Purpose: Scans every matched scholarship on scholarship universe, collecting data on the name, award amount, due date, application number, and difficulty of each scholarship
Input:
    driver: A webdriver object on the scholarships tab of scholarship universe
Output:
    scholarship_data: A list containing a dictionary for each scholarship. The dictionary has keys for Name, Amount, Due date,
    Applications, and Difficulty

In [9]:
def scan_scholarships(driver):
    active = True
    wait = WebDriverWait(driver, timeout=20, poll_frequency=1)
    scholarship_data = []
    while active:
        wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'tbody[kendogridtablebody][role="rowgroup"][class="k-table-tbody"]')))
        time.sleep(1)
        award_amounts = [x.text for x in driver.find_elements(By.CSS_SELECTOR, '#amount > p.detail-value.amount-value > strong') if x.text != '']
        names = [x.text for x in driver.find_elements(By.CLASS_NAME, 'big-name') if x.text != '']
        apply_dates = [x.text.split(' - ')[1] for x in driver.find_elements(By.CSS_SELECTOR, '#apply-between > p.detail-value') if x.text != '']
        apply_dates = [datetime.datetime(int(x.split('/')[2]), int(x.split('/')[0]), int(x.split('/')[1])) for x in apply_dates]
        scholarship_list = driver.find_elements(By.CSS_SELECTOR, 'tbody[kendogridtablebody][role="rowgroup"][class="k-table-tbody"]>tr')
        for i in range(len(scholarship_list)):
            ActionChains(driver).move_to_element(scholarship_list[i]).perform()
            stats = analyze_scholarship(driver, scholarship_list[i])
            stats['Amount'] = award_amounts[i]
            stats['Name'] = names[i]
            stats['Due date'] = apply_dates[i]
            scholarship_data.append(stats)
        next_button = driver.find_element(By.CSS_SELECTOR, 'button[title="Go to the next page"]')
        active = test_button(next_button)
        if not active:
            break
        button_click(driver = driver, button = next_button)
    return scholarship_data

Function name: clear_collection
Purpose: clears a given mongodb collection
Input:
    collection: A mongodb collection to be cleared
Output:
    A boolean, True if the collection was successfully cleared, False otherwise

In [10]:
def clear_collection(collection):
    collection.delete_many({})
    return collection.count_documents({}) == 0

Function_name: refresh_collection
Purpose: Gathers data on every matched scholarship in scholarship universe and adds the data to a mongodb collection
Inputs:
    db_uri: the uri the mongod service is listening on
    database_name: the database containing the collection for data to be stored in
    collection_name: the name of the collection to store data in
    ua_username: the UA netId of the user
    ua_password: the UA password of the user
Outputs:
    None

In [11]:
def refresh_collection(db_uri, database_name, collection_name, ua_username, ua_password):
    collection = get_collection(db_uri, database_name, collection_name)
    clear_collection(collection)
    driver = webdriver.Chrome()
    open_site(driver, UA_URL)
    login(driver, ua_username, ua_password)
    open_scholarship_page(driver)
    scholarship_data = scan_scholarships(driver)
    collection.insert_many(scholarship_data)
    driver.quit()

In [12]:
refresh_collection(DB_STRING, DATABASE_NAME, COLLECTION_NAME, UA_USERNAME, UA_PASSWORD)