James Caldwell <br>
UVA IRA, 10/8/2025 <br>

This Python script automates the extraction of error tables from the SCHEV Institutional Portal and merges them with a VCSIN-to-SSID mapping file. The final result is exported to an Excel workbook with each error code on a separate sheet.

Features: <br>
Opens the SCHEV portal in a Chrome browser via Selenium.

Allows manual login for secure access.

Extracts links corresponding to error codes.

Follows each link to retrieve error tables.

Merges error tables with a local VCSIN-to-SSID Excel mapping.

Saves all merged tables to a single Excel workbook with meaningful sheet names.

In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import time
from urllib.parse import urljoin
from io import StringIO
import re

# Variables to change
url = "https://portals.schev.edu/institutions/PUBLIC/UVA/Viewerrorsummary.asp?tablename=fa&repyear=2425"
VCSIN_to_SSID_path = r"path here"
chrome_driver_path = r"path here"
excel_output_path = r"path here"

# Set up Selenium WebDriver
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service)#, options=options)

# Open the login page
driver.get(url)

print("Browser opened. Please log in manually.")
input("Press ENTER here *after* you have logged in successfully...")

# After you log in, grab the page
driver.get(url)
html = driver.page_source
mysoup = BeautifulSoup(html, 'html.parser')

# Load VCSIN to SSID mapping file
print("Loading VCSIN to SSID mapping file...")
VCSIN_to_SSID = pd.read_excel(VCSIN_to_SSID_path)

# === Extract all links ===
links = []
for a in mysoup.find_all('a', href=True):
    href = a['href']
    # Skip empty, anchor, or javascript links
    if href.startswith('#') or href.lower().startswith('javascript'):
        continue
    # Convert relative URLs to absolute
    full_link = href if href.startswith('http') else driver.current_url.rsplit('/', 1)[0] + '/' + href
    if 'viewError' in full_link:  # filter for relevant links
        links.append(full_link)

print(f"\nFound {len(links)} error code links:")
# for l in links[:10]:  # preview first 10
#     print(l)

all_error_tables_for_excel = []
all_error_table_names_for_excel = []
# # === Visit each link ===
for i, link in enumerate(links, start=1): # links[:3]
    print(f"\nVisiting link {i}/{len(links)}: {link}")
    try:
        driver.get(link)
        time.sleep(1)  # wait for page to load; adjust as needed
        
        page_html = driver.page_source
        page_soup = BeautifulSoup(page_html, 'html.parser')

        # Find the <script> tag containing the specific substring for error table url
        link_tag = page_soup.find(src=lambda value: value and "ErrorsList.asp" in value)
        if link_tag:
            
            ## save Error Name for excel sheet name
            match = re.search(r'Errcode=([^&"]+)', page_html)
            if match:
                error_code = match.group(1)
                print(f'Error Code: {error_code}')
                all_error_table_names_for_excel.append(error_code)  # error_code = match.group(1)

            ## Follow error table URL and save
            relative_url = link_tag['src']
            # print("Relative URL:", relative_url)

            # Convert to absolute
            base = "https://portals.schev.edu/institutions/PUBLIC/UVA/"
            table_url = urljoin(base, relative_url)
            # print("Full URL:", table_url)

            driver.get(table_url)
            time.sleep(1) # wait for page to load; adjust as needed
            table_page_html = driver.page_source

            # Parse tables with pandas
            tables = pd.read_html(StringIO(table_page_html), header=0)
            df = tables[0]  # Get the first table, adjust index if needed

            # Merge with VCSIN_to_SSID to get SSID
            if 'SOCSEC1' in df.columns:
                df = df.merge(VCSIN_to_SSID, how='left', left_on='SOCSEC1', right_on='VCSIN').drop(columns='VCSIN')

            all_error_tables_for_excel.append(df)

        else:
            print("No match found.")
    except Exception as e:
        print(f"Error visiting {link}: {e}")

print("\nDone visiting all links.")

print("\nSaving to Excel...")
with pd.ExcelWriter(excel_output_path, engine='openpyxl') as writer:
    for i, df in enumerate(all_error_tables_for_excel):
        df.to_excel(writer, sheet_name=all_error_table_names_for_excel[i], index=False)

driver.quit()



Browser opened. Please log in manually.
Loading VCSIN to SSID mapping file...

Found 84 error code links:

Visiting link 1/84: https://portals.schev.edu/institutions/PUBLIC/UVA/viewErrors.asp?Err=AEF001W02&ErrType=W&TableName=fa&Repyear=2425&Position=340
Error Code: AEF001W02

Visiting link 2/84: https://portals.schev.edu/institutions/PUBLIC/UVA/viewErrors.asp?Err=AEF003W02&ErrType=W&TableName=fa&Repyear=2425&Position=354
Error Code: AEF003W02

Visiting link 3/84: https://portals.schev.edu/institutions/PUBLIC/UVA/viewErrors.asp?Err=AFA004W01&ErrType=W&TableName=fa&Repyear=2425&Position=68
Error Code: AFA004W01

Visiting link 4/84: https://portals.schev.edu/institutions/PUBLIC/UVA/viewErrors.asp?Err=AFA008W01&ErrType=W&TableName=fa&Repyear=2425&Position=74
Error Code: AFA008W01

Visiting link 5/84: https://portals.schev.edu/institutions/PUBLIC/UVA/viewErrors.asp?Err=AFE001E02&ErrType=E&TableName=fa&Repyear=2425&Position=123
Error Code: AFE001E02

Visiting link 6/84: https://portals.sche