In [None]:
# pip install requests beautifulsoup4 anytree

In [None]:
import requests
import json
from bs4 import BeautifulSoup
from anytree import Node, RenderTree
import re

# Transfer Schools

In [None]:
def get_transfer_schools():
    r = requests.get("https://apps.utdallas.edu/transfercredit/schools.json", verify=False)
    return json.loads(
        f"{{{r.text}}}".replace("=", ":")
        .replace("schools", '"schools"')
        .replace(",]", "]")
    )

transfer_schools = get_transfer_schools()

In [None]:
print(json.dumps(transfer_schools, indent=4))

# Courses

In [None]:
def get_departments_table():
    r = requests.get("https://catalog.utdallas.edu/2024/undergraduate/courses")
    soup = BeautifulSoup(r.content, "html.parser")
    table = soup.find("table", {"id": "courses"})
    department_table = []
    table_body = table.find("tbody")

    rows = table_body.find_all("tr")
    for row in rows:
        cols = row.find_all("td")
        cols = [ele.text.strip() for ele in cols]
        department_table.append([ele for ele in cols if ele])  # Get rid of empty values
    return department_table

department_table = get_departments_table()

In [None]:
def get_course_infomation(department_table):
    courses = {}
    for x in department_table:
        prefix = x[0]
        r = requests.get(
            "https://catalog.utdallas.edu/2024/undergraduate/courses/" + prefix
        )
        soup = BeautifulSoup(r.content, "html.parser")
        courses_listing = soup.find("div", {"id": "bukku-page"})
        paragraphs = courses_listing.find_all("p")
        for course in paragraphs:
            course_id = course.get("id").upper()
            prefix = course_id[:-4]
            number = course_id[-4:]
            course_id = f"{prefix} {number}"
            name = course.find("span", {"class": "course_title"}).text.strip()
            text = (
                course.text.strip()
                .replace(",", "")
                .replace("or better", "")
                .replace("or equivalent", "")
                .replace("  ", " ")
            )
            prerequisite_match = re.search(
                r"\. Prerequisites?:\s([a-zA-Z0-9\(\ )]*)\.", text
            )
            corequisite_match = re.search(
                r"\. Corequisites?:\s([a-zA-Z0-9\(\ )]*)\.", text
            )
            prerequisite_or_corequisite_match = re.search(
                r"\. (Prerequisites? or Corequisites?:|Corequisites? or Prerequisites?:)\s([a-zA-Z0-9\(\ )]*)\.",
                text,
            )
            courses[course_id] = {
                "prefix": prefix,
                "number": number,
                "name": name,
                "requisites": {
                    "prerequisite": prerequisite_match.group(1).strip()
                    if prerequisite_match
                    else "",
                    "corequisite": corequisite_match.group(1).strip()
                    if corequisite_match
                    else "",
                    "prerequisite_or_corequisite": prerequisite_or_corequisite_match.group(
                        2
                    ).strip()
                    if prerequisite_or_corequisite_match
                    else "",
                },
            }
    return courses

In [None]:
# courses = get_course_infomation(list(filter(lambda x: x[0] == "CS", department_table)))
courses = get_course_infomation(department_table)

In [None]:
print(json.dumps(courses, indent=4))

# Miscellaneous

In [None]:
# If you need to condense boolean trees

In [None]:
def tokenize_req_description(desc):
    desc = desc.upper()
    # treat case WITH a GRADE as pretend multiplication
    desc = desc.replace("WITH A GRADE", "COND WITH A GRADE")
    result = re.findall(r"(\(|\)|((?!\bAND\b|\bOR\b|\bCOND\b|\(|\)).)+|\bAND\b|\bOR\b|\bCOND\b)", desc)
    return list(map(lambda x: x[0].strip().replace(".", ""), list(filter(lambda x: x[0].strip() != "", result))))

In [None]:
# might want to change and to be higher precedence than or
def infix_to_postfix(tokens):
    if len(tokens) == 1:
        return tokens
    def precedence(op):
        if op == "COND":
            return 3
        if op == "AND":
            return 2
        if op == "OR":
            return 1
        return 0
    stack = []
    result = []
    for token in tokens:
        if token == "(":
            stack.append(token)
        elif token == ")":
            while stack[-1] != "(":
                result.append(stack.pop())
            stack.pop()
        elif token in ["AND", "OR", "COND"]:
            while len(stack) > 0 and precedence(stack[-1]) >= precedence(token):
                result.append(stack.pop())
            stack.append(token)
        else:  # append operand
            result.append(token)
    while len(stack) > 0:
        result.append(stack.pop())
    return result

In [None]:
def postfix_to_tree(tokens):
    if len(tokens) == 1:
        return Node("AND", children=[Node(tokens[0])])
    operators = ["AND", "OR", "COND"]
    stack = []
    for token in tokens:
        if token in operators:
            right = stack.pop()
            left = stack.pop()
            stack.append(Node(token, children=[left, right]))
        else:
            stack.append(Node(token))
    if len(tokens) == 1:
        return stack.append(Node(tokens[0]))
    return stack[0]

In [None]:
# simplify binary tree operators by combining nodes with the same operator
def simplify_tree(node):
    if len(node.children) == 0:
        return node
    children = list(node.children)
    # detach from parent
    for child in children:
        child.parent = None
    for i in range(len(children)):
        children[i] = simplify_tree(children[i])
    # reattach to parent
    node.children = children
    if node.name == "AND":
        children = []
        for child in node.children:
            if child.name == "AND":
                children.extend(child.children)
            else:
                children.append(child)
        node.children = children
    if node.name == "OR":
        children = []
        for child in node.children:
            if child.name == "OR":
                children.extend(child.children)
            else:
                children.append(child)
        node.children = children
    if node.name == "COND":
        children = []
        for child in node.children:
            if child.name == "COND":
                children.extend(child.children)
            else:
                children.append(child)
        node.children = children
    return node

In [None]:
def displayTree(node):
    for pre, _, node in RenderTree(node):
        print("%s%s" % (pre, node.name))

In [None]:
def tree_string(node):
    result = ""
    for pre, _, node in RenderTree(node):
        result += "%s%s\n" % (pre, node.name)
    return result

In [None]:
def display_requisites_as_tree(req):
    displayTree(simplify_tree(postfix_to_tree(infix_to_postfix(tokenize_req_description(req)))))

In [None]:
def requisites_as_tree_string(req):
    return tree_string(simplify_tree(postfix_to_tree(infix_to_postfix(tokenize_req_description(req)))))

In [None]:
with open("requisite_trees_big.txt", "w", encoding="utf-8") as f:
    for course_id, course in courses.items():
        if course["requisites"]["prerequisite"] != "":
            f.write(f"{course_id} {course['name']}\n")
            f.write("Prerequisites or Corequisites:\n")
            f.write(requisites_as_tree_string(course["requisites"]["prerequisite"]))
            f.write("\n")
        if course["requisites"]["corequisite"] != "":
            f.write(f"{course_id} {course['name']}\n")
            f.write("Corequisites:\n")
            f.write(requisites_as_tree_string(course["requisites"]["corequisite"]))
            f.write("\n")
        if course["requisites"]["prerequisite_or_corequisite"] != "":
            f.write(f"{course_id} {course['name']}\n")
            f.write("Prerequisites:\n")
            f.write(requisites_as_tree_string(course["requisites"]["prerequisite_or_corequisite"]))
            f.write("\n")