In [None]:
# This block of code imports necessary tools and Selenium modules for web scraping.
import pandas as pd
print("Imported pandas.")

from selenium import webdriver
print("Imported webdriver from selenium.")

from selenium.webdriver.common.by import By
print("Imported By from selenium.webdriver.common.")

from selenium.webdriver.support.ui import WebDriverWait
print("Imported WebDriverWait from selenium.webdriver.support.ui.")

from selenium.webdriver.support import expected_conditions as EC
print("Imported expected_conditions as EC from selenium.webdriver.support.")

from selenium.webdriver.support.ui import Select
print("Imported Select from selenium.webdriver.support.ui.")

from selenium.webdriver.common.action_chains import ActionChains
print("Imported ActionChains from selenium.webdriver.common.")

from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException, TimeoutException, WebDriverException
print("Imported exceptions from selenium.common.exceptions.")

import time
print("Imported time module.")

print("All modules imported successfully. Ready to initialize the Safari WebDriver.")


In [None]:
# This code block prepares to launch the Safari WebDriver. It attempts to create a WebDriver instance for Safari.
try:
    # Attempt to create a WebDriver instance for Safari
    driver = webdriver.Safari()
    print("Safari WebDriver has been successfully initialized and launched.")

except WebDriverException as e:
    print("Failed to launch Safari WebDriver due to a WebDriver exception:")
    print(str(e))
except Exception as e:
    print("An unexpected error occurred while initializing the Safari WebDriver:")
    print(str(e))


In [None]:
#set wait time to reduce server load/lag/failure
wait = WebDriverWait(driver, 1)
print("changed WebDriver Wait Time to 5 to reduce failure chance when submitting inputs")

In [None]:
# Initialize the WebDriver and navigate to a URL
try:
    
    url = "https://finder.healthcare.gov/#planresults&U2FsdGVkX1%2Fi5Pmjp%2BsYYiIO9oHYp0MR2uRIy9rhiZDkkdOucG2AHsvkdr%2FhLAGdUvzzgfZRgiPamBjtoPtoMyuTDDQsTTM%2FUq2DFb4lUJwsimkgTasUaO9C%2BfEt4SasLUb7FhLAcBFvpTMofTrM9e73VkrJIpyzU7F2HGHe8ypZUXRLL2Wz9gDsHfLRnXBcg3YcxYzoazR8NwP4lr9CsVfA0CDQE71t%2BgQgAKqNqwM%3D"
    print(f"Attempting to open the website: {url}")
    driver.get(url)
    print(f"Opened the website: {url}")

    # Optionally add a timeout for loading the page
    driver.set_page_load_timeout(10)  # Set timeout to 10 seconds

    print(f"Page title after loading: {driver.title}")
    actions = ActionChains(driver)
    print("Setting up automation window action drivers variable")

except TimeoutException as e:
    print("Error: The page did not load within the expected time.", e)
except WebDriverException as e:
    print("Web Driver error occurred:", e)
except Exception as e:
    print("An unexpected error occurred while initializing the driver or loading the page:", e)


In [None]:
#set window size for monitor
width, height = 750, 1200
driver.set_window_size(width, height)

# Use an f-string to dynamically include the window size in the print statement
print(f"Set window size to {width}x{height} pixels")

In [None]:
# This block of code starts the process to navigate to the healthcare plans page. It locates the 'I am seeking coverage for myself' button by its ID ('btn-plansHome'),
# clicks the button, and fetches the HTML content of the current page after clicking.

try:
    print("Locating the 'I am seeking coverage for myself' button by ID: btn-plansHome")
    # Locate the button by ID
    button = wait.until(
        EC.element_to_be_clickable((By.ID, "btn-plansHome")),
        message="The button 'I am seeking coverage for myself' was not clickable within the expected time."
    )
    print("Button located successfully. Proceeding to click the button.")
    
    # Click the button if needed
    button.click()
    print("Button clicked. Navigating to the details page.")

    # Fetch the HTML of the entire body after clicking
    print("Fetching the HTML of the current page...")
    body_html = driver.execute_script("return document.body.innerHTML;")
    print("Successfully retrieved HTML content of the page.")

except NoSuchElementException as e:
    print("Element not found error: The 'I am seeking coverage for myself' button could not be located on the page.", e)
except TimeoutException as e:
    print("Timeout error: The button was not clickable within the time limit set by WebDriverWait.", e)
except ElementClickInterceptedException as e:
    print("Element click intercepted error: Something is blocking the button from being clicked.", e)
except ElementNotInteractableException as e:
    print("Element not interactable error: The button is present but not in a state that allows interaction.", e)
except Exception as e:
    print("An unexpected error occurred during the process:", e)
    print("Error details:", str(e))


In [None]:
# This block of code starts the process to input the zipcode. It locates the zipcode input field by its ID ('zip'),
# clears any pre-existing text in the input box, inputs the zipcode "87001", and then fetches the HTML content of the page after inputting the zipcode.

try:
    print("Locating the zipcode input field by ID: zip")
    # Wait for the zipcode input field to be visible
    zip_input = wait.until(
        EC.visibility_of_element_located((By.ID, "zip")),
        message="Zipcode input field was not visible within the expected time."
    )
    print("Zipcode input field located successfully.")

    # Clear any pre-existing text in the input box
    print("Clearing any pre-existing text in the zipcode input field.")
    zip_input.clear()
    
    # Input the zipcode
    print("Inputting the zipcode: 87001")
    zip_input.send_keys("87001")
    
    # Optional: wait a bit to allow the page to react to the input if necessary
    time.sleep(1)  # Adjust delay as needed based on the website's response time to input
    
    # Fetch the HTML of the entire body after inputting the zipcode
    print("Fetching the HTML of the current page...")
    body_html = driver.execute_script("return document.body.innerHTML;")
    print("Successfully retrieved HTML content of the page.")
    print("!!! PLEASE MAKE SURE TO WAIT APPROX 1 SECOND AFTER CLICKING ON NEXT CELL, WAIT FOR WEBSITE TO LOAD !!!")

    
except NoSuchElementException as e:
    print("Element not found error: The 'zip' input field could not be located on the page.", e)
except TimeoutException as e:
    print("Timeout error: The 'zip' input field did not become visible within the expected time.", e)
except ElementNotInteractableException as e:
    print("Element not interactable error: The 'zip' input field is not interactable.", e)
except Exception as e:
    print("An unexpected error occurred during the zipcode input process:", e)
    print("Error details:", str(e))


In [None]:
# This block of code prepares to input gender, date of birth (DOB), and tobacco usage values.
# It locates the gender selection dropdown, selects "Male", locates the DOB input field, sets the DOB to '10/30/2001',
# locates the tobacco usage dropdown, selects "Non-Smoker", and then fetches the HTML content of the page after performing all actions.

print("!!! PLEASE MAKE SURE TO WAIT APPROX 1 SECOND AFTER CLICKING ON CELL, WAIT FOR WEBSITE TO LOAD !!!")
print("Starting the process to input gender, DOB, and tobacco usage values...")

try:
    print("Locating the gender selection dropdown...")
    primary_gender_select = wait.until(
        EC.element_to_be_clickable((By.ID, "primaryGender"))
    )
    select = Select(primary_gender_select)
    print("Gender dropdown located successfully. Selecting 'Male'...")
    select.select_by_value("Male")  # Select "Male" from the dropdown
    print("Gender 'Male' selected.")

    print("Locating the DOB input field...")
    dob_input = wait.until(
        EC.visibility_of_element_located((By.ID, "primaryDOB"))
    )
    print("DOB input field located. Setting DOB to '10/30/2001'...")
    driver.execute_script("arguments[0].value = '10/30/2001';", dob_input)
    print("DOB set successfully.")
    
    print("Locating the tobacco usage dropdown...")
    tobacco_usage_select = wait.until(
        EC.element_to_be_clickable((By.ID, "primaryTobaccoUsage"))
    )
    select_tobacco = Select(tobacco_usage_select)
    print("Tobacco usage dropdown located successfully. Selecting 'Non-Smoker'...")
    select_tobacco.select_by_value("Non-Smoker")  # Select "Non-Smoker" from the dropdown
    print("Tobacco usage 'Non-Smoker' selected.")
    
    # Fetch the HTML of the body after performing all actions
    print("Fetching the HTML of the current page after updates...")
    body_html = driver.execute_script("return document.body.innerHTML;")
    print("Successfully retrieved HTML content of the page.")
    
except NoSuchElementException as e:
    print("Element not found error:", e)
except ElementNotInteractableException as e:
    print("Element not interactable error:", e)
except UnexpectedTagNameException as e:
    print("Unexpected tag name error when using Select on an element:", e)
except TimeoutException as e:
    print("Timeout error while waiting for an element to be interactable:", e)
except Exception as e:
    print("An unexpected error occurred:", e)


In [None]:
# This block of code starts the process to navigate to the 'View Plans' page.
# It locates the 'View Plans' button by its ID ('viewPlansBtn'), clicks the button, and then fetches and prints the HTML content of the page after clicking the button.

print("Starting the process to navigate to the 'View Plans' page...")

try:
    print("Locating the 'View Plans' button by ID: viewPlansBtn")
    # Locate the button by ID
    button = wait.until(
        EC.element_to_be_clickable((By.ID, "viewPlansBtn"))
    )
    print("Button located successfully. Proceeding to click the button.")
    
    # Click the button if needed
    button.click()
    print("Button clicked. Navigating to the plans display page.")

    # To print out the HTML of the entire body after clicking
    print("Fetching and printing the HTML of the current page after clicking the 'View Plans' button...")
    body_html = driver.execute_script("return document.body.innerHTML;")
    print("Successfully retrieved HTML content of the page.")

except NoSuchElementException as e:
    print("Element not found error: The 'View Plans' button could not be located on the page.", e)
except ElementClickInterceptedException as e:
    print("Element click intercepted error: Something is blocking the 'View Plans' button from being clicked.", e)
except ElementNotInteractableException as e:
    print("Element not interactable error: The 'View Plans' button is present but not interactable at the moment.", e)
except TimeoutException as e:
    print("Timeout error: The 'View Plans' button was not clickable within the time limit.", e)
except Exception as e:
    print("An unexpected error occurred:", e)
    print("Error details:", str(e))


In [None]:
# This line initializes an empty list to store data of healthcare plans.

plan_data = []

try:
    wait = WebDriverWait(driver, 10)
    while True:
        print("Waiting for the 'plan-results' section to be visible on the webpage...")
        wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "plan-results")), message="Failed to find the 'plan-results' section within the timeout period.")
        print("The 'plan-results' section is now visible. Starting to process plan items...")

        plan_items = driver.find_elements(By.CLASS_NAME, "plan-item")
        print(f"Found {len(plan_items)} plan items on the page.")

        for index, item in enumerate(plan_items):
            print(f"Processing item {index + 1}...")
            try:
                plan_name = item.find_element(By.CSS_SELECTOR, "h2 a").text
                company_name = item.find_element(By.CSS_SELECTOR, ".plan-item-subtitle").text
                monthly_premium = item.find_element(By.CSS_SELECTOR, ".col-monthly-premium strong[data-name='baseRateAmount']").text
                individual_deductible = item.find_element(By.CSS_SELECTOR, ".col-annual-deductible .plan-cost .plan-type-value[data-name='individualAnnualDeductibleAmount']").text
                family_deductible = item.find_element(By.CSS_SELECTOR, ".col-annual-deductible .plan-cost .plan-type-value[data-name='familyAnnualDeductibleAmount']").text
                individual_oop_limit = item.find_element(By.CSS_SELECTOR, ".col-out-of-pocket-limit .plan-cost .plan-type-value[data-name='individualAnnualOOPLimitAmount']").text
                family_oop_limit = item.find_element(By.CSS_SELECTOR, ".col-out-of-pocket-limit .plan-cost .plan-type-value[data-name='familyAnnualOOPLimitAmount']").text

                plan_data.append({
                    'Plan Name': plan_name,
                    'Company Name': company_name,
                    'Monthly Premium': monthly_premium,
                    'Individual Deductible': individual_deductible,
                    'Family Deductible': family_deductible,
                    'Individual OOP Limit': individual_oop_limit,
                    'Family OOP Limit': family_oop_limit
                })
                print(f"Successfully processed {plan_name}.")
            except NoSuchElementException as e:
                print(f"Could not find one or more elements for plan {index + 1}. Error: {str(e)}")
            except WebDriverException as e:
                print(f"WebDriver error encountered for plan {index + 1}. Error: {str(e)}")

        # Check for and click the next page button if it exists and is not disabled
        next_page_buttons = driver.find_elements(By.CSS_SELECTOR, "a.btn_next:not(.disabled)")
        if next_page_buttons:
            print("Navigating to the next page...")
            next_page_buttons[0].click()
        else:
            print("No more pages to process.")
            break

    print("All plan items have been processed. Here are the collected data:")
    df = pd.DataFrame(plan_data)
    print(df)

except TimeoutException as e:
    print(f"Operation timed out while waiting for the 'plan-results' section: {str(e)}")
except Exception as e:
    print(f"An unexpected error occurred: {str(e)}")


In [None]:
# This block of code attempts to click the next page link using JavaScript execution.
# It locates the specific link element using a CSS selector and clicks it.
# If successful, it prints a confirmation message. If an error occurs during the process, it prints an error message.
try:
    # Execute JavaScript to click the link
    driver.execute_script("document.querySelector('body > div > #main-content > div > div.container.content-area > div > div.col-sm-9.border-left > #resultsDiv > #pagingBottom > li:nth-child(6) > #next').click();")
    print("Next page link clicked using JavaScript execution.")

except Exception as e:
    print(f"An error occurred while trying to click the next page link with JavaScript: {e}")


In [None]:
# This block of code attempts to find and click the next page link after scrolling it into view.
# It first locates the next page link element using a CSS selector, then scrolls it into view using JavaScript,
# and finally clicks the link. If successful, it prints a confirmation message.
# If the next page link is not found, it prints a message indicating that. If an unexpected error occurs during the process, it prints an error message.

try:
    next_page_link = driver.find_element(By.CSS_SELECTOR, "body > div > #main-content > div > div.container.content-area > div > div.col-sm-9.border-left > #resultsDiv > #pagingBottom > li:nth-child(6) > #next")
    driver.execute_script("arguments[0].scrollIntoView(true);", next_page_link)
    next_page_link.click()
    print("Next page link clicked after scrolling into view.")

except NoSuchElementException:
    print("Next page link not found on the page.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


In [None]:
try:
    next_page_link = driver.find_element(By.ID, "next")
    next_page_link.click()
    print("Next page link clicked using ID.")

except NoSuchElementException:
    print("Next page link not found using ID.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

In [None]:
try:
    wait = WebDriverWait(driver, 10)
    print("Checking for the presence of the next page link...")

    # Wait for the link to be clickable
    next_page_link = wait.until(EC.element_to_be_clickable((By.ID, "next")))
    print("Next page link is clickable.")

    # Scroll the link into view and click it
    driver.execute_script("arguments[0].scrollIntoView(true);", next_page_link)
    next_page_link.click()
    print("Next page link clicked successfully.")

except TimeoutException:
    print("Timed out waiting for the next page link to become clickable.")
except NoSuchElementException:
    print("Next page link not found on the page.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

In [None]:
try:
    # JavaScript to fetch content from a pseudo-element at the specified selector path
    content_script = """
    return window.getComputedStyle(document.querySelector('body > div > #main-content > div > div.container.content-area > div > div.col-sm-9.border-left > #resultsDiv > #pagingBottom > li:nth-child(6) > #next'), '::after').getPropertyValue('content');
    """
    content = driver.execute_script(content_script)
    print("Content of ::after pseudo-element for the next page link:", content)

except Exception as e:
    print(f"Failed to retrieve content from pseudo-element: {e}")


In [None]:
# I was unable to create the pipeline completely due to a set of elements that were 
# unable to be access by selenium, these elements appear in a js window/box that has a 
# css styling on them that allows them to disappear and reappear, and the elements
# arent actually present within website DOM, I ran out of time to try another solution