# College Tour Information Scraper

Gathers information from the https://www.youvisit.com/collegesearch/ website



In [49]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys

In [50]:
DRIVER_PATH = "./chromedriver"
WEBSITE_URL = "https://www.youvisit.com/collegesearch/"

In [51]:
def wait_for_element(by_selector, selector, seconds=10):
    """
    Takes in a selector type and a selector and waits for the element to load on the page.
    
    :param: by_selector The method to use for selecting.
    :param: selector The string selector to use.
    :param: seconds How long to wait until a timeout is thrown.
    """
    try:
        item = WebDriverWait(driver, seconds).until(
            EC.presence_of_element_located((by_selector, selector)))
    except Exception:
        raise Exception("Could not find the specified selector '{}' using '{}'".format(selector, by_selector))
        
    return item

In [52]:
def click_on_element_from_selector(by_selector, selector, seconds=10):
    """
    Helper function to take a element and click on it.
    
    :param: by_selector The method to use for selecting.
    :param: selector The string selector to use.
    :param: seconds How long to wait until a timeout is thrown.
    """
    element = wait_for_element(by_selector, selector, seconds=10)
    element.click()

In [53]:
def send_keys_to_element_from_selector(by_selector, selector, text, seconds=10):
    """
    Helper funciton to take an element and send it key inputs.
    
    :param: by_selector The method to use for selecting.
    :param: selector The string selector to use.
    :param: text The string to send as input.
    :param: seconds How long to wait until a timeout is thrown.
    """
    element = wait_for_element(by_selector, selector, seconds=10)
    element.send_keys(text)

In [54]:
driver = webdriver.Chrome(DRIVER_PATH)
driver.get(WEBSITE_URL)
account_button_xpath = r"//*[@id='yv.com-cs-root']/div[1]/div[2]/div[1]/div[2]/button"
click_on_element_from_selector(By.XPATH, account_button_xpath, seconds=10)

In [55]:
email_button_xpath = r"/html/body/div[2]/div[1]/div/div/div/div/div/div/div/div/div[2]/div[6]/button"
click_on_element_from_selector(By.XPATH, email_button_xpath, seconds=10)

In [56]:
# The email text input is contained within an iframe that we have to switch into to get the content inside.
email_iframe_xpath = r"/html/body/div[13]/div/div/iframe"
email_iframe_element = wait_for_element(By.XPATH, email_iframe_xpath, seconds=10)
driver.switch_to.frame(email_iframe_element)

In [57]:
email_input_xpath = r"/html/body/div[1]/div[2]/div/div[1]/div[2]/div[2]/div[1]/input"
send_keys_to_element_from_selector(By.XPATH, email_input_xpath, "email@gmail.com", seconds=10)

In [58]:
email_submit_button_xpath = r"/html/body/div[1]/div[2]/div/div[2]/div/div/button"
click_on_element_from_selector(By.XPATH, email_submit_button_xpath, seconds=10)

In [59]:
first_name_input_xpath = r"/html/body/div[1]/div[2]/div/div[1]/div[2]/div[3]/div[1]/input"
send_keys_to_element_from_selector(By.XPATH, first_name_input_xpath, "asdfwer", seconds=10)

In [60]:
last_name_input_xpath = r"/html/body/div[1]/div[2]/div/div[1]/div[2]/div[4]/div[1]/input"
send_keys_to_element_from_selector(By.XPATH, last_name_input_xpath, "asdfwer", seconds=10)

In [63]:
birthdate_xpath = r"/html/body/div[1]/div[2]/div/div[1]/div[2]/div[5]/div[1]/input"
send_keys_to_element_from_selector(By.XPATH, birthdate_xpath, "10/10/2000", seconds=10)

In [64]:
zipcode_xpath = r"/html/body/div[1]/div[2]/div/div[1]/div[2]/div[6]/div[1]/input"
send_keys_to_element_from_selector(By.XPATH, zipcode_xpath, "07303", seconds=10)

In [65]:
submit_button_xpath = r"/html/body/div[1]/div[2]/div/div[2]/div/div/button"
click_on_element_from_selector(By.XPATH, submit_button_xpath, seconds=10)