### Prototype Code : Scraper

In [None]:
# GET URL FUNCTION
def get_url():
    return input("Enter Website URL: ")

# DATA SCRAPING FUNCTION
def data_scrape(url):
    # Set up Selenium with a WebDriver, e.g., ChromeDriver
    driver = webdriver.Firefox()
    
    # Open the website
    driver.get(url)
    
    # Find all visible buttons
    buttons = driver.find_elements(By.CSS_SELECTOR, "button")
    visible_buttons = []
    for button in buttons:
        if button.is_displayed():
            store = f"Button Element: {button.text} with URL : {button.get_attribute('href')}"
            visible_buttons.append(store)
            
    # Find all visible links
    links = driver.find_elements(By.TAG_NAME,"a")
    visible_links = []
    for link in links:
        if link.is_displayed():
            store = f"Link Element: {link.text} with URL : {link.get_attribute('href')}"
            visible_links.append(store)
            
    # Find all visible forms (text inputs)
    visible_text_inputs = []
    # Scrape input fields
    input_fields = driver.find_elements(By.TAG_NAME,"input")
    for input_field in input_fields:
        if input_field.is_displayed():
            store = f"Input Field Name: {input_field.get_attribute("name")} Input field type: {input_field.get_attribute("type")}"
            visible_text_inputs.append(store)
    
    # Find all visible text
    visible_text = []
    # Scrape headings
    for level in range(1, 7):  # HTML has 6 levels of headings (h1 to h6)
        headings = driver.find_elements(By.TAG_NAME,f"h{level}")
        for heading in headings:
            if heading.is_displayed():
                store = f"Heading (h{level}): {heading.text}"
                visible_text.append(store)
    # Scrape text content
    paragraphs = driver.find_elements(By.TAG_NAME,"p")
    for paragraph in paragraphs:
        if paragraph.is_displayed():
            store = f"Paragraph Text: {paragraph.text}"
            visible_text.append(store)
            
    # Find all visible navigation menus
    visible_nav_menus = []
    # Scrape navigation menus
    navigation_menus = driver.find_elements(By.TAG_NAME,"nav")
    for nav in navigation_menus:
        if nav.is_displayed():
            store = f"Navigation Menu: {nav.text}"
            visible_nav_menus.append(store)
            
    # Close the browser
    driver.quit()

    # Return Data
    data = []
    
    # Data append
    data.append(visible_buttons)
    data.append(visible_links)
    data.append(visible_text_inputs)
    data.append(visible_text)
    data.append(visible_nav_menus)

    return data

In [None]:
# Possibly Outdated: Other methods
from selenium import webdriver

# Set up Selenium with a WebDriver, e.g., ChromeDriver
driver = webdriver.Chrome()

# Open the website
driver.get("https://example.com")

# Find an element (example button)
button = driver.find_element_by_css_selector("button")

# Get various properties
button_text = button.text
button_id = button.get_attribute("id")
button_class = button.get_attribute("class")
button_color = button.value_of_css_property("background-color")
button_size = button.size
button_location = button.location
button_tag_name = button.tag_name
button_visible = button.is_displayed()
button_enabled = button.is_enabled()
button_inner_html = button.get_attribute("innerHTML")

# Print properties
print("Button text:", button_text)
print("Button ID:", button_id)
print("Button class:", button_class)
print("Button color:", button_color)
print("Button size:", button_size)
print("Button location:", button_location)
print("Button tag name:", button_tag_name)
print("Button visible:", button_visible)
print("Button enabled:", button_enabled)
print("Button inner HTML:", button_inner_html)

# Close the browser
driver.quit()


### Version 1: Improvement According to Design

In [None]:
#####################################
# GET URL FUNCTION (non UI version) #
#####################################
def get_url():
    return input("Enter Website URL: ")

# DATA SCRAPING FUNCTION
def data_scrape(url):
    # Setup Selenium Webdriver
    # TODO : Add other drivers according to user settings. Detect user browser and use that as driver potentially
    driver = webdriver.Firefox()
    # Setup Return Data
    data = []

    # Open the website
    driver.get(url)

    #######################################
    ## GET UI ELEMENTS (must be visible) ##
    #######################################

    # BUTTONS
    # TODO : "Button <button-name> with size <size> and color <color>. This button is <isEnabled>"
    """
    - Name (text)
    - Size 
    - Color 
    - isEnabled
    """
    buttons = driver.find_elements(By.CSS_SELECTOR, "button")
    visible_buttons = []
    for button in buttons:
        if button.is_displayed():
            store = f"Button Element: {button.text} with URL : {button.get_attribute('href')}"
            visible_buttons.append(store)
            
    # LINKS
    # TODO : "Link <link-name> with url <href>"
    """
    - Name (text)
    - href
    """
    links = driver.find_elements(By.TAG_NAME,"a")
    visible_links = []
    for link in links:
        if link.is_displayed():
            store = f"Link Element: {link.text} with URL : {link.get_attribute('href')}"
            visible_links.append(store)

    # VISIBLE TEXT
    # TODO : 
    #   Separate Paragraphs From HNs
    #   ""<p> or <h> with content <text>""
    """
    - Content (text)
    """
    visible_text = []
    # HN
    for level in range(1, 7):  # HTML has 6 levels of headings (h1 to h6)
        headings = driver.find_elements(By.TAG_NAME,f"h{level}")
        for heading in headings:
            if heading.is_displayed():
                store = f"Heading (h{level}): {heading.text}"
                visible_text.append(store)
    # PARAGRAPHS
    paragraphs = driver.find_elements(By.TAG_NAME,"p")
    for paragraph in paragraphs:
        if paragraph.is_displayed():
            store = f"Paragraph Text: {paragraph.text}"
            visible_text.append(store)

    # FORMS : INPUT TYPE TEXT
    # TODO : 
    #   Input Type Text : "form input of type text named <name>"
    #   Input Type Submit : "form input of type submit named <name>"
    """
    - input type text
        - Name
    - input type submit
        - Name
    """
    visible_text_inputs = []
    input_fields = driver.find_elements(By.TAG_NAME,"input")
    for input_field in input_fields:
        if input_field.is_displayed():
            store = f"Input Field Name: {input_field.get_attribute("name")} Input field type: {input_field.get_attribute("type")}"
            visible_text_inputs.append(store)
            
    # Close the browser
    driver.quit()
    
    ############################################
    ## COMPOUND ELEMENTS INTO A LIST OF LISTS ##
    ############################################

    data.append(visible_buttons)
    data.append(visible_links)
    data.append(visible_text_inputs)
    data.append(visible_text)

    return data