In [None]:
import os
import re
import time
import pandas as pd

from dataclasses import dataclass
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

In [None]:
from typing import Optional, List, Tuple, Union

In [None]:
import sys

In [None]:
_PKG_PATH: str = "/Users/adebayobraimah/Desktop/projects/CSE505"

In [None]:
sys.path.append(_PKG_PATH)

In [None]:
from src.utils import util

In [None]:
url = "https://prod.ps.stonybrook.edu/psc/csprodg/EMPLOYEE/CAMP/c/COMMUNITY_ACCESS.SSS_BROWSE_CATLG.GBL?"

In [None]:
wait_time: int = 10

In [None]:
# Setup Selenium WebDriver
driver = webdriver.Chrome()
driver.get(url)

In [None]:
major_three_letter_code: str = "mat".upper()
major_three_letter_code

In [None]:
nav_letter: str = major_three_letter_code[0].upper()
nav_letter

In [None]:
WebDriverWait(driver, wait_time).until(
    EC.element_to_be_clickable((By.LINK_TEXT, nav_letter))
).click()

In [None]:
# Navigate to major
WebDriverWait(driver, wait_time).until(
    EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, major_three_letter_code))
).click()

In [None]:
# Get table data
time.sleep(wait_time // 2)  # Time to wait for javascript to load the table.
table: List[webdriver.remote.webelement.WebElement] = driver.find_elements(
    By.TAG_NAME, "tbody"
)

In [None]:
for tab in reversed(table):
    try:
        tab.find_element(By.PARTIAL_LINK_TEXT, "Course Nbr")
    except (AttributeError, NoSuchElementException):
        table.remove(tab)

try:
    table = table[-1]  # Get the last table
except IndexError:
    table = None

In [None]:
table

In [None]:
# Verify table
if not table.is_displayed():
    raise ValueError("Table is not displayed. Check the URL and major code.")

if not table.text:
    raise ValueError("Table is empty. Check the URL and major code.")


In [None]:
headers = [header.text for header in table.find_elements(By.TAG_NAME, "th")]
headers

In [None]:
rows = []
for row in table.find_elements(By.TAG_NAME, "tr"):
    cells = [cell.text for cell in row.find_elements(By.TAG_NAME, "td")]
    if cells:  # This check is to skip rows without table data cells
        rows.append(cells)
rows

In [None]:
df = pd.DataFrame(rows, columns=headers)

In [None]:
_course_numbers: List[str] = df["Course Nbr"].tolist()
# _course_numbers

In [None]:
# NO EDIT
def clean_course_title(course_title: str) -> str:
    """Clean course title by removing any additional information after '**'.

    Args:
        course_title: Course title string.

    Returns:
        Cleaned course title string.
    """
    # Use a regular expression to match only the course title before '**'
    cleaned_title = re.sub(r"\*\*.*$", "", course_title).strip()
    return cleaned_title

In [None]:
_course_titles: List[str] = df["Course Title"].tolist()
df["Course Title"] = [clean_course_title(title) for title in _course_titles]

In [None]:
course = _course_numbers[0]
course

In [None]:
# Wait for the page to load and click on course number
WebDriverWait(driver, wait_time).until(
    EC.element_to_be_clickable((By.LINK_TEXT, f"{course}"))
).click()

In [None]:
# Use ID to find element -- it is unique.
WebDriverWait(driver, wait_time).until(
    EC.presence_of_element_located(
        (By.ID, "win0divSSR_CRSE_OFF_VW_ACAD_CAREER$0")
    )
).click()

In [None]:
career: str = driver.find_element(By.ID, "win0divSSR_CRSE_OFF_VW_ACAD_CAREER$0").text
career

In [None]:
units: str = float(driver.find_element(By.ID, "DERIVED_CRSECAT_UNITS_RANGE$0").text)
units

In [None]:
grading_basis: str = driver.find_element(
    By.ID, "win0divSSR_CRSE_OFF_VW_GRADING_BASIS$0"
).text
grading_basis

In [None]:
_enrollment_requirement: str = driver.find_element(
    By.ID, "DERIVED_CRSECAT_DESCR254A$0"
).text
_enrollment_requirement

In [None]:
academic_group: str = driver.find_element(By.ID, "ACAD_GROUP_TBL_DESCR$0").text
academic_group