Create a scraper that opens the TEA TAPR Advanced Download Menu; 

Automatically Selects: 
- "District" Radio Button
- "College, Career, and Military Readiness (CCMR), TSIA, College Prep" Radio Button
- "Continue" Radio Button 



In [55]:
#Importing necessary packages 
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time

In [57]:
# Setup WebDriver
options = webdriver.ChromeOptions()
options.add_experimental_option("prefs", {
    "download.default_directory": r"C:\Users\YourUsername\Downloads",  # Change this path
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": True
})

# Initialize Chrome WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# Open the webpage
url = "https://rptsvr1.tea.texas.gov/perfreport/tapr/2023/download/DownloadData.html"
driver.get(url)

# Wait for the page to load
wait = WebDriverWait(driver, 10)

# List of radio button labels to select
radio_labels = [
    "District",
    "College, Career, and Military Readiness (CCMR), TSIA, College Prep"
]

try:
    # Find all labels on the page
    labels = driver.find_elements(By.TAG_NAME, "label")

    for label_text in radio_labels:
        print(f"Selecting '{label_text}' radio button...")

        # Find the label that matches the desired text
        for label in labels:
            if label_text in label.text:
                # Find the associated radio button inside the label
                radio_button = label.find_element(By.TAG_NAME, "input")

                # Scroll into view
                driver.execute_script("arguments[0].scrollIntoView();", radio_button)

                # Click the radio button
                driver.execute_script("arguments[0].click();", radio_button)
                print(f"✅ Selected '{label.text}'")

                # Break after selecting the correct one
                break

    # Wait briefly before clicking Continue
    time.sleep(2)

    # Click the "Continue" button ONCE after selecting all radio buttons
    continue_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//input[@type='submit' and @value='Continue']")))
    driver.execute_script("arguments[0].click();", continue_button)

    # Wait for the download to complete
    time.sleep(5)

except Exception as e:
    print(f"❌ Error: {e}")

# Close the browser
driver.quit()

print("✅ Download completed!")

Selecting 'District' radio button...
✅ Selected 'District'
Selecting 'College, Career, and Military Readiness (CCMR), TSIA, College Prep' radio button...
✅ Selected 'College, Career, and Military Readiness (CCMR), TSIA, College Prep'
✅ Download completed!


In [58]:
# this creates separate directories for different years to avoid issues with downloaded file names
import time
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import os

# select the datasets
years = list(range(2018, 2024))

data_acronyms = ['PROF', 'PERF1', 'GRAD', 'STAAR1', 'REF'] # think PERF1 is PERF in previous years, need to fix that

# access TAPR
for year in years:
    dir_name = f"raw_data{year}"
    os.makedirs(dir_name, exist_ok=True)
    chromeOptions = webdriver.ChromeOptions()
    prefs = {"download.default_directory" : f"/Users/biancaschutz/HERC/{dir_name}"}
    chromeOptions.add_experimental_option("prefs",prefs)
    driver = webdriver.Chrome(options=chromeOptions)
    driver.get(f"https://rptsvr1.tea.texas.gov/perfreport/tapr/{year}/download/DownloadData.html")
# select district
    district_select = driver.find_element(By.XPATH, "//input[@type='radio' and @name='sumlev' and @value='D']")
    district_select.click()

    for acr in data_acronyms:
        print(acr)
        try:
            select_data = driver.find_element(By.XPATH, f"//input[@type='radio' and @name='setpick' and @value='{acr}']")
            select_data.click()
            print("clicked")
            download = driver.find_element(By.XPATH, "//input[@type='submit' and @value='Continue']")
            download.click() 
            time.sleep(60)
            print("downloaded")
        except NoSuchElementException:
            print(f"{acr} not found for {year}")
            continue 

    driver.quit()


PROF
clicked
downloaded
PERF1
PERF1 not found for 2018
GRAD
clicked
downloaded
STAAR1
clicked
downloaded
REF
clicked
downloaded
PROF
clicked
downloaded
PERF1
PERF1 not found for 2019
GRAD
clicked
downloaded
STAAR1
clicked
downloaded
REF
clicked
downloaded
PROF
clicked
downloaded
PERF1
PERF1 not found for 2020
GRAD
clicked
downloaded
STAAR1
clicked
downloaded
REF
clicked
downloaded
PROF
clicked
downloaded
PERF1
clicked
downloaded
GRAD
clicked
downloaded
STAAR1
clicked
downloaded
REF
clicked
downloaded
PROF
clicked
downloaded
PERF1
clicked
downloaded
GRAD
clicked
downloaded
STAAR1
clicked
downloaded
REF
clicked
downloaded
PROF
clicked
downloaded
PERF1
clicked
downloaded
GRAD
clicked
downloaded
STAAR1
clicked
downloaded
REF
clicked
downloaded
