In [None]:
#imports
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

#generic and reusable Selenium helper methods
class SeleniumHelper:
    def __init__(self, browser='chrome', implicit_wait=10):
        self.driver = self._setup_driver(browser)
        self.driver.implicitly_wait(implicit_wait)
        
    def _setup_driver(self, browser):
        if browser.lower() == 'chrome':
            driver = webdriver.Chrome()
        elif browser.lower() == 'firefox':
            driver = webdriver.Firefox()
        else:
            raise ValueError("Only 'chrome' and 'firefox' browsers are supported.")
        return driver
    
    def go_to(self, url):
     self.driver.get(url)
    
    def click_element(self, by, value):
        element = WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((by, value)))
        element.click()
  
    def get_element_text(self, by, value):
        element = self.driver.find_element(by, value)
        return element.text
    
    def get_all_links(self, by=By.TAG_NAME, value="a"):
        elements = self.driver.find_elements(by, value)
        links = [element.get_attribute('href') for element in elements if element.get_attribute('href')]
        return links
      
    def get_elemetns_text(self, by, value):
        elements = self.driver.find_elements(by, value)
        return [element.text for element in elements]
    
    def send_keys(self, by, value, keys):
        element = self.driver.find_element(by, value)
        element.clear()
        element.send_keys(keys)
    
    def hover_over_element(self, by, value):
        element = self.driver.find_element(by, value)
        ActionChains(self.driver).move_to_element(element).perform()
    
    def quit(self):
        self.driver.quit()
    

In [None]:
#features stored in lists
STATES = [
    'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 
    'Connecticut', 'Delaware', 'District of Columbia', 'Florida', 'Georgia', 
    'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 
    'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 
    'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 
    'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 
    'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 
    'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 
    'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'
]
DIVISIONS = ['Clubs', 'Middle School', 'High School', 'College']
YEARS = list(range(2006, 2025))
GENDERS = ['Boys', 'Girls']
EVENTS = {
    "Outdoor Track": {
        '100 Meters', '200 Meters', '400 Meters'
    }
}
AGES = list(range(3, 52)) + [53, 54, 55, 56, 61, 69, 90, 98]


In [None]:
"""
Data Intake Strategy:
1. Loop through every state for the high school, middle school, 
club, and college divisions and extract link for individual states
2. Click on individual states, loop through years from 2006-2024
and extract links for each year
3. For each link for each year, extract the links for first 3 unique 
meets from "Top XXXX Outdoor Meets" (top 7 most popular meets)
4. For the each outdoor meet, loop extract any of the links for results
that have 100, 200, or 400 in it.
5. For each unique sprinter (check if school name and
personal name already exist for given year), add corresponding 
data into a list

5. Store the time and year for each entry
and put into dictionary with state as primary key,
then event as another subkey, then year as another subkey,
and then a list consisting of times
"""
"""
Data Taking In (#data_number:{dict_of_data}):
First Name and Last Name (unique personal and school name)
School Name (unique personal and school name)
Year/Grade Level
State 
Personal Best PB (calculated based on times)
Dictionary of Lists of Times
    Event
        Date
        Time/Result    
"""

#extracting information from athletic.net links
athletic_net_high_school = "https://www.athletic.net/track-and-field-outdoor/usa/high-school/2024"
athletic_net_middle_school = "https://www.athletic.net/track-and-field-outdoor/usa/middle-school/2024"
athletic_net_clubs = "https://www.athletic.net/track-and-field-outdoor/usa/club/2024"
athletic_net_college = "https://www.athletic.net/track-and-field-outdoor/usa/college/2024"

athletic_net_links = []

dropdown_items = driver.find_elements(By.CSS_SELECTOR, ".dropdown-menu .dropdown-item")

In [None]:
#high school


In [None]:
#middle school


In [None]:
#clubs


In [None]:
#college
