In [None]:
# pip install requests beautifulsoup4 anytree

In [None]:
import requests
import json
from bs4 import BeautifulSoup
from anytree import Node, RenderTree
import re

In [None]:
def get_transfer_school_credits_all():
    test_credit_table = {}
    for test_type, url in [
        (
            "A & AS Level",
            "https://transfercredit.utdallas.edu/search-by-test/a-as-level-credit/",
        ),
        ("AP", "https://transfercredit.utdallas.edu/search-by-test/ap-credit/"),
        ("CLEP", "https://transfercredit.utdallas.edu/search-by-test/clep-credit/"),
        ("IB", "https://transfercredit.utdallas.edu/search-by-test/ib-credit/"),
    ]:
        r = requests.get(url)
        soup = BeautifulSoup(r.content, "html.parser")
        table = soup.find("table")
        table_body = table.find("tbody")
        rows = table_body.find_all("tr")

        for row in rows:
            cols = row.find_all("td")
            # structure is
            # Test Component | Exam | Earned Score | UTD Course(s) for which credit can be earned | Course Name | Max Claimable Hours
            cols = [ele.text.strip() for ele in cols]
            test_component = cols[0]
            test_credit_table[test_component] = {
                "test_type": test_type,
                "exam": " ".join(cols[1].split()),
                "earned_score": " ".join(cols[2].split()),
                "utd_courses": " ".join(cols[3].split()).replace("—", "---"),
                "course_name": " ".join(cols[4].split()),
                "max_claimable_hours": " ".join(cols[5].split()),
            }
    return test_credit_table

In [None]:
test_credits_table = get_transfer_school_credits_all()

In [None]:
for test_component, test_credit in test_credits_table.items():
    # for now, ignore the flag for core curriculum credit
    test_credit["exam"] = test_credit["exam"].replace("\u2013", "-")
    test_credit["utd_courses"] = test_credit["utd_courses"].replace("+", " AND ")
    test_credit["utd_courses"] = "".join(
        [x for x in test_credit["utd_courses"] if x.isalnum() or x in [" ", ",", "-"]]
    )
    test_credit["utd_courses"] = test_credit["utd_courses"].replace("\u2020", "")
    test_credit["utd_courses"] = test_credit["utd_courses"].upper()

In [None]:
# ok, so WHAP is the only test credit that has credit but is not a course at UTD, but they didn't label it like transfer credits
test_credits_table["WH"]["utd_courses"] = "GNED 1040"

In [None]:
# for any numbering without a direct prefix, add the previous prefix
# MATH 2312, 2333, 2413, STAT 1342, 2332
for test_component, test_credit in test_credits_table.items():
    # for now, ignore the flag for core curriculum credit
    while True:
        match = re.search(r", [0-9-]+", test_credit["utd_courses"])
        if match:
            # find previous prefix
            prefix = re.findall(r"[A-Z]+", test_credit["utd_courses"])[: match.start()][
                -1
            ]
            test_credit["utd_courses"] = (
                test_credit["utd_courses"][: match.start() + 2]
                + prefix
                + test_credit["utd_courses"][match.start() + 1 :]
            )
            match = None
            continue
        else:
            break

In [None]:
test_credits_final = {}
for test_component, test_credit in test_credits_table.items():
    test_credits_final[test_component] = {
        "testType": test_credit["test_type"],
        "examName": test_credit["exam"],
        "minScore": int(test_credit["earned_score"].split("-")[0])
        if "-" in test_credit["earned_score"]
        else int(test_credit["earned_score"]),
        "maxScore": int(test_credit["earned_score"].split("-")[1])
        if "-" in test_credit["earned_score"]
        else int(test_credit["earned_score"]),
        "maxClaimableHours": int(float(test_credit["max_claimable_hours"])),
        # too lazy to do boolean trees again
        "utdEquivalencyCourses": test_credit["utd_courses"],
    }

In [None]:
# with open("test_credit_info.json", "w", encoding="utf-8") as f:
#     json.dump(test_credits_final, f, indent=4)

In [None]:
with open("test_credit_info.json", "r") as file:
    test_credits_final = json.load(file)

# Transfer Schools

In [None]:
def get_transfer_schools():
    r = requests.get(
        "https://apps.utdallas.edu/transfercredit/schools.json", verify=False
    )
    return json.loads(
        f"{{{r.text}}}".replace("=", ":")
        .replace("schools", '"schools"')
        .replace(",]", "]")
    )


transfer_schools = get_transfer_schools()

In [None]:
transfer_school_credits = {}


def set_transfer_school_credits(school_id):
    r = requests.post(
        "https://apps.utdallas.edu/transfercredit/ajax.php",
        data={"id": school_id, "option": 1},
        verify=False,
    )
    soup = BeautifulSoup(r.content, "html.parser")
    table = soup.find("table", {"id": "schoolTable"})
    transfer_table = []
    table_body = table.find("tbody")
    rows = table_body.find_all("tr")
    for row in rows:
        # first column is External Course to Transfer, second is UTD Course Equivalent
        cols = row.find_all("td")
        # joins all whitespace into one
        external_course = " ".join(cols[0].find("strong").text.strip().split())
        external_course_name = " ".join(cols[0].find("br").next_sibling.strip().split())
        utd_course = " ".join(cols[1].find("strong").text.strip().split())
        utd_course_name = " ".join(cols[1].find("br").next_sibling.strip().split())
        transfer_table.append(
            {
                "external_course": external_course,
                "external_course_name": external_course_name,
                "utd_course": utd_course,
                "utd_course_name": utd_course_name,
            }
        )
    transfer_school_credits[school_id] = transfer_table

In [None]:
from concurrent.futures import ThreadPoolExecutor

transfer_school_credits = {}
num_schools = len(transfer_schools["schools"])
with ThreadPoolExecutor(max_workers=500) as executor:
    executor.map(
        set_transfer_school_credits,
        list(
            map(
                lambda x: transfer_schools["schools"][x]["school_id"],
                range(num_schools),
            )
        ),
    )

In [None]:
# with open("transfer_school_info.json", "w", encoding="utf-8") as f:
#     json.dump(transfer_school_credits, f, indent=4)

In [None]:
with open("transfer_school_info.json", "r") as file:
    transfer_school_credits = json.load(file)

In [None]:
transfer_school_credits_final = {}

for school in transfer_schools["schools"]:
    school_id = school["school_id"]
    transfer_school_credits_final[school_id] = {
        "school_name": school["school_name"],
        "school_city": school["school_city"],
        "school_state": school["school_state"],
        "school_country": school["school_country"],
        "transfer_credits": transfer_school_credits[school_id],
    }

In [None]:
# with open("transfer_school_credits_final.json", "w", encoding="utf-8") as f:
#     json.dump(transfer_school_credits_final, f, indent=4)

In [None]:
with open("transfer_school_credits_final.json", "r") as file:
    transfer_school_credits_final = json.load(file)

In [None]:
utd_courses_equivalency = [
    credit["utd_course"]
    for school in transfer_school_credits_final.values()
    for credit in school["transfer_credits"]
]
utd_courses_equivalency = list(set(utd_courses_equivalency))
utd_courses_equivalency.sort()

In [None]:
with open("uh.txt", "w") as f:
    f.write("\n".join(utd_courses_equivalency))

In [None]:
utd_courses_equivalency

In [None]:
transfer_info = {}
for school_id, info in transfer_school_credits_final.items():
    transfer_info[school_id] = {
        "schoolName": info["school_name"],
        "schoolCity": info["school_city"],
        "schoolState": info["school_state"],
        "schoolCountry": info["school_country"],
        "TransferCourseEquivalencies": list(
            map(
                lambda x: {
                    "transferCourseID": x["external_course"],
                    "transferCourseName": x["external_course_name"],
                    "utdCourseEquivalency": x["utd_course"],
                    "utdCourseEquivalencyName": x["utd_course_name"],
                    "transferSchoolSchoolID": school_id,
                },
                info["transfer_credits"],
            )
        ),
    }

In [None]:
# with open("transfer_info.json", "w", encoding="utf-8") as f:
#     json.dump(transfer_info, f, indent=4)

In [None]:
with open("transfer_info.json", "r") as file:
    transfer_info = json.load(file)

# Courses

In [None]:
def get_departments_table():
    r = requests.get("https://catalog.utdallas.edu/2024/undergraduate/courses")
    soup = BeautifulSoup(r.content, "html.parser")
    table = soup.find("table", {"id": "courses"})
    department_table = []
    table_body = table.find("tbody")

    rows = table_body.find_all("tr")
    for row in rows:
        cols = row.find_all("td")
        cols = [ele.text.strip() for ele in cols]
        department_table.append([ele for ele in cols if ele])  # Get rid of empty values
    return department_table


department_table = get_departments_table()

In [None]:
def get_course_infomation(department_table):
    courses = {}
    for x in department_table:
        prefix = x[0]
        r = requests.get(
            "https://catalog.utdallas.edu/2024/undergraduate/courses/" + prefix
        )
        soup = BeautifulSoup(r.content, "html.parser")
        courses_listing = soup.find("div", {"id": "bukku-page"})
        paragraphs = courses_listing.find_all("p")
        for course in paragraphs:
            course_id = course.get("id").upper()
            prefix = course_id[:-4]
            number = course_id[-4:]
            course_id = f"{prefix} {number}"
            name = course.find("span", {"class": "course_title"}).text.strip()
            text = (
                course.text.strip()
                .replace(",", "")
                .replace("or better", "")
                .replace("or higher", "")
                .replace("or equivalent", "")
                .replace("  ", " ")
            )
            prerequisite_match = re.search(
                r"\. Prerequisites?:\s([a-zA-Z0-9\(\ )]*)\.", text
            )
            corequisite_match = re.search(
                r"\. Corequisites?:\s([a-zA-Z0-9\(\ )]*)\.", text
            )
            prerequisite_or_corequisite_match = re.search(
                r"\. (Prerequisites? or Corequisites?:|Corequisites? or Prerequisites?:)\s([a-zA-Z0-9\(\ )]*)\.",
                text,
            )
            courses[course_id] = {
                "prefix": prefix,
                "number": number,
                "name": name,
                "requisites": {
                    "prerequisites": prerequisite_match.group(1).strip()
                    if prerequisite_match
                    else "",
                    "corequisites": corequisite_match.group(1).strip()
                    if corequisite_match
                    else "",
                    "prerequisitesOrCorequisites": prerequisite_or_corequisite_match.group(
                        2
                    ).strip()
                    if prerequisite_or_corequisite_match
                    else "",
                },
            }
    return courses

In [None]:
# courses = get_course_infomation(list(filter(lambda x: x[0] == "CS", department_table)))
courses = get_course_infomation(department_table)

In [None]:
import copy

courses2 = copy.deepcopy(courses)

In [None]:
print(json.dumps(courses, indent=4))

## Miscellaneous

In [None]:
# If you need to condense boolean trees

In [None]:
# schools = {
#     "Erik Jonsson School of Engineering and Computer Science",
#     "Harry W. Bass Jr. School of Arts, Humanities, and Technology",
#     "Honors College",
#     "Naveen Jindal School of Management",
#     "School of Behavioral and Brain Sciences",
#     "School of Economic, Political and Policy Sciences",
#     "School of Interdisciplinary Studies",
#     "School of Natural Sciences and Mathematics",
#     "Undergraduate Studies",
# }

In [None]:
schools = {
    "Engineering and Computer Science",
    "Arts, Humanities, and Technology",
    "Honors College",
    "Naveen Jindal Management",
    "Behavioral and Brain Sciences",
    "Economic, Political and Policy Sciences",
    "Interdisciplinary Studies",
    "Natural Sciences and Mathematics",
    "Undergraduate Studies",
}

In [None]:
def tokenize_req_description(desc):
    desc = desc.upper()
    desc = desc.replace("WITH A GRADE", "WITH A MINIMUM GRADE")
    # treat case WITH a GRADE as pretend multiplication
    desc = desc.replace("WITH A MINIMUM GRADE", "COND WITH A MINIMUM GRADE")
    desc = desc.replace(",", "").replace(".", "")

    # schools can have AND in their names
    for school in schools:
        desc = desc.replace(
            school.upper().replace(",", ""),
            school.upper().replace(",", "").replace("AND", ""),
        )
    result = re.findall(
        r"(\(|\)|((?!\bAND\b|\bOR\b|\bCOND\b|\(|\)).)+|\bAND\b|\bOR\b|\bCOND\b)", desc
    )
    return list(
        map(lambda x: x[0].strip(), list(filter(lambda x: x[0].strip() != "", result)))
    )

In [None]:
# might want to change and to be higher precedence than or
def infix_to_postfix(tokens):
    if len(tokens) == 1:
        return tokens

    def precedence(op):
        if op == "COND":
            return 3
        if op == "AND":
            return 2
        if op == "OR":
            return 1
        return 0

    stack = []
    result = []
    for token in tokens:
        if token == "(":
            stack.append(token)
        elif token == ")":
            while stack[-1] != "(":
                result.append(stack.pop())
            stack.pop()
        elif token in ["AND", "OR", "COND"]:
            while len(stack) > 0 and precedence(stack[-1]) >= precedence(token):
                result.append(stack.pop())
            stack.append(token)
        else:  # append operand
            result.append(token)
    while len(stack) > 0:
        result.append(stack.pop())
    return result

In [None]:
def postfix_to_tree(tokens):
    if len(tokens) == 1:
        return Node("AND", children=[Node(tokens[0])])
    operators = ["AND", "OR", "COND"]
    stack = []
    for token in tokens:
        if token in operators:
            right = stack.pop()
            left = stack.pop()
            stack.append(Node(token, children=[left, right]))
        else:
            stack.append(Node(token))
    if len(tokens) == 1:
        return stack.append(Node(tokens[0]))
    return stack[0]

In [None]:
# simplify binary tree operators by combining nodes with the same operator
def simplify_tree(node):
    if len(node.children) == 0:
        return node
    children = list(node.children)
    # detach from parent
    for child in children:
        child.parent = None
    for i in range(len(children)):
        children[i] = simplify_tree(children[i])
    # reattach to parent
    node.children = children
    if node.name == "AND":
        children = []
        for child in node.children:
            if child.name == "AND":
                children.extend(child.children)
            else:
                children.append(child)
        node.children = children
    if node.name == "OR":
        children = []
        for child in node.children:
            if child.name == "OR":
                children.extend(child.children)
            else:
                children.append(child)
        node.children = children
    if node.name == "COND":
        children = []
        for child in node.children:
            if child.name == "COND":
                children.extend(child.children)
            else:
                children.append(child)
        node.children = children
    return node

In [None]:
def displayTree(node):
    for pre, _, node in RenderTree(node):
        print("%s%s" % (pre, node.name))

In [None]:
def tree_string(node):
    result = ""
    for pre, _, node in RenderTree(node):
        result += "%s%s\n" % (pre, node.name)
    return result

In [None]:
def force_AND_as_root(node):
    return Node("AND", children=[node])

In [None]:
def display_requisites_as_tree(req):
    displayTree(
        simplify_tree(
            force_AND_as_root(
                postfix_to_tree(infix_to_postfix(tokenize_req_description(req)))
            )
        )
    )

In [None]:
def requisites_as_tree_string(req):
    return tree_string(
        simplify_tree(
            force_AND_as_root(
                postfix_to_tree(infix_to_postfix(tokenize_req_description(req)))
            )
        )
    )

In [None]:
courses3 = copy.deepcopy(courses2)
with open("requisite_trees_big.txt", "w", encoding="utf-8") as f:
    for course_id, course in courses3.items():
        if course["requisites"]["prerequisites"] != "":
            f.write(f"{course_id} {course['name']}\n")
            f.write("Prerequisites or Corequisites:\n")
            f.write(requisites_as_tree_string(course["requisites"]["prerequisites"]))
            f.write("\n")
        if course["requisites"]["corequisites"] != "":
            f.write(f"{course_id} {course['name']}\n")
            f.write("Corequisites:\n")
            f.write(requisites_as_tree_string(course["requisites"]["corequisites"]))
            f.write("\n")
        if course["requisites"]["prerequisitesOrCorequisites"] != "":
            f.write(f"{course_id} {course['name']}\n")
            f.write("Prerequisites:\n")
            f.write(
                requisites_as_tree_string(
                    course["requisites"]["prerequisitesOrCorequisites"]
                )
            )
            f.write("\n")

## Requisites Final

In [None]:
def boolean_tree_to_json(node):
    if len(node.children) == 0:
        return node.name
    children = []
    for child in node.children:
        children.append(boolean_tree_to_json(child))
    return {node.name: children}

In [None]:
def json_to_requisite_json(req, condition=None):
    if type(req) is str:
        course_pattern = re.compile("([A-Z]{2,4} [0-9V]{4})")
        if course_pattern.match(req):
            if condition is not None:
                return {"type": "course", "courseID": req, "minGrade": condition}
            else:
                return {"type": "course", "courseID": req}
        else:
            # just put everything else as custom
            return {"type": "custom", "text": req}
    for key in req:
        if key == "AND":
            return {
                "logicalOperator": "AND",
                "requisites": [json_to_requisite_json(x, condition) for x in req[key]],
            }
        if key == "OR":
            return {
                "logicalOperator": "OR",
                "requisites": [json_to_requisite_json(x, condition) for x in req[key]],
            }
        if key == "COND":
            # the minimum grade condition is the last key, trust me bro
            condition_text = req[key][-1]
            letter_grade = condition_text.replace("WITH A MINIMUM GRADE OF", "").strip()
            return [
                json_to_requisite_json(x, letter_grade)
                for x in req[key]
                if "WITH A MINIMUM GRADE" not in x
            ]

In [None]:
courses = copy.deepcopy(courses2)
for course_id, course in courses.items():
    if course["requisites"]["prerequisites"] != "":
        courses[course_id]["requisites"]["prerequisites"] = json_to_requisite_json(
            boolean_tree_to_json(
                simplify_tree(
                    force_AND_as_root(
                        postfix_to_tree(
                            infix_to_postfix(
                                tokenize_req_description(
                                    courses[course_id]["requisites"]["prerequisites"]
                                )
                            )
                        )
                    )
                )
            )
        )
        # courses[course_id]["requisites"]["prerequisites"] = boolean_tree_to_json(simplify_tree(force_AND_as_root(postfix_to_tree(infix_to_postfix(tokenize_req_description(courses[course_id]["requisites"]["prerequisites"]))))))
    else:
        courses[course_id]["requisites"]["prerequisites"] = {}

    if course["requisites"]["corequisites"] != "":
        courses[course_id]["requisites"]["corequisites"] = json_to_requisite_json(
            boolean_tree_to_json(
                simplify_tree(
                    force_AND_as_root(
                        postfix_to_tree(
                            infix_to_postfix(
                                tokenize_req_description(
                                    course["requisites"]["corequisites"]
                                )
                            )
                        )
                    )
                )
            )
        )
        # courses[course_id]["requisites"]["corequisites"] = boolean_tree_to_json(simplify_tree(force_AND_as_root(postfix_to_tree(infix_to_postfix(tokenize_req_description(course["requisites"]["corequisites"]))))))
    else:
        courses[course_id]["requisites"]["corequisites"] = {}

    if course["requisites"]["prerequisitesOrCorequisites"] != "":
        courses[course_id]["requisites"]["prerequisitesOrCorequisites"] = (
            json_to_requisite_json(
                boolean_tree_to_json(
                    simplify_tree(
                        force_AND_as_root(
                            postfix_to_tree(
                                infix_to_postfix(
                                    tokenize_req_description(
                                        course["requisites"][
                                            "prerequisitesOrCorequisites"
                                        ]
                                    )
                                )
                            )
                        )
                    )
                )
            )
        )
        # courses[course_id]["requisites"]["prerequisitesOrCorequisites"] = boolean_tree_to_json(simplify_tree(force_AND_as_root(postfix_to_tree(infix_to_postfix(tokenize_req_description(course["requisites"]["prerequisitesOrCorequisites"]))))))
    else:
        courses[course_id]["requisites"]["prerequisitesOrCorequisites"] = {}

In [None]:
len(courses2)

In [None]:
with open("course_info.json", "w", encoding="utf-8") as f:
    json.dump(courses, f, indent=4)