In [92]:
pip install requests beautifulsoup4 pandas



In [93]:
import requests
import re
from bs4 import BeautifulSoup
from pathlib import Path
import csv


# url for UCSC Baskin courses
bsoe_base_url = "https://courses.engineering.ucsc.edu/"

# fetch the base page and parse the department links
base = requests.get(bsoe_base_url)
soup_base = BeautifulSoup(base.content, 'html.parser')
department_menu = soup_base.find("ul", id="main-menu")

# gather department links
department_links = []
for department in department_menu:
    if department != "\n":
        if link := department.find("a"):
            department_links.append(
                (bsoe_base_url + link["href"][1:], link.text))

department_links.pop()
# print(department_links)

# initialize a list to hold course data
course_data = []

# iterate through department links and extract course information
for link, name in department_links:
    r = requests.get(link)
    soup = BeautifulSoup(r.content, 'html.parser')
    table = soup.find("table")
    sections = table.find_all("li")

    for section in sections:
        link = section.find("a")
        section_num = link.text
        class_url = link["href"]
        class_res = re.search(
            "\/[a-z]*\/([A-Z0-9]*)\/([A-Za-z0-9]*)\/", class_url)

        if class_res:
            class_name = class_res.group(1)  # extract class name
            quarter = class_res.group(2)  # extract quarter
            full_prof = section.contents[3].strip() if len(section.contents) > 3 else ''

            # extract professor name, checking if it exists
            prof_res = re.search(r"([ a-zA-ZÀ-ž-.]*) \(|(Staff)", full_prof)
            professor_name = prof_res.group(1) or prof_res.group(2)

            # skip the course if no professor is found
            # case where the class is not taught at all that school year
            if not professor_name or professor_name.strip().lower() == "staff":
                continue

            # prepare the data for CSV
            course_data.append({
                "department": name,
                "quarter": quarter,
                "class_name": class_name,
                "professor_name": professor_name,
                "section": section_num,
            })
            # print(course_data[-1])  # print the last added data

# write data to a CSV file
csv_file = 'ucsc_website_baskin_courses_2024.csv'
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    fieldnames = ["department", "quarter", "class_name", "professor_name", "section"]
    writer = csv.DictWriter(file, fieldnames=fieldnames)

    writer.writeheader()
    writer.writerows(course_data)


In [94]:
### courses dicts ###

# define the courses as given
must_take = {
    "CSE 12": {"credits": 7, "prerequisites": [["CSE 5J", "CSE 20", "CSE 30", "BME 160"]]},
    "CSE 16": {"credits": 5, "prerequisites": [["MATH 20A", "MATH 19A", "MATH 19B", "MATH 11B", "AM 11B", "AM 15B", "ECON 11B"]]},
    "CSE 20": {"credits": 5, "prerequisites": []},
    "CSE 30": {"credits": 5, "prerequisites": [["CSE 20", "BME 160", "MATH 3", "MATH 11A", "MATH 19A", "AM 3", "AM 11A"]]},
    "CSE 40": {"credits": 5, "prerequisites": [["MATH 19B", "MATH 20B"], ["CSE 30"]]},
    "CSE 13S": {"credits": 7, "prerequisites": [["CSE 12", "BME 160"]]},
    "ECE 30": {"credits": 5, "prerequisites": [["MATH 19B", "MATH 20B"]]},

    "CSE 101": {"credits": 5, "prerequisites": [["CSE 12", "BME 160"], ["CSE 13E", "ECE 13", "CSE 13S"],
        ["CSE 16"], ["CSE 30"], ["MATH 11B", "MATH 19B", "MATH 20B", "AM 11B", "ECON 11B"]]},
    "CSE 101M": {"credits": 5, "prerequisites": [["CSE 101"]]},
    "CSE 120": {"credits": 5, "prerequisites": [["CSE 12"], ["CSE 13S", "CSE 13E", "ECE 13"]]},
    "CSE 130": {"credits": 5, "prerequisites": [["CSE 12"],["CSE 101"]]}
}

# must pick 1 calc 1 class
calc_1 = {
    "MATH 19A": {"credits": 5, "prerequisites": []},
    "MATH 20A": {"credits": 5, "prerequisites": []}
}

# must pick 1 calc 2 class
calc_2 = {
    "MATH 19B": {"credits": 5, "prerequisites": [["MATH 19A", "MATH 20A"]]},
    "MATH 20B": {"credits": 5, "prerequisites": [["MATH 20A"]]}
}

# must pick 1 calc 3 class
calc_3 = {
    "AM 30": {"credits": 5, "prerequisites": [["AM 10", "MATH 21"], ["MATH 19B", "MATH 20B"]]},
    "MATH 23A": {"credits": 5, "prerequisites": [["MATH 19A"]]}
}

# must pick 1 one linear alg class
linear_alg = {
    "AM 10": {"credits": 5, "prerequisites": []},
    "MATH 21": {"credits": 5, "prerequisites": [["MATH 19A"]]}
}

# must pick 1 stats class
stats = {
    "STAT 131": {"credits": 5, "prerequisites": [["AM 11B", "ECON 11B", "MATH 11B", "MATH 19B", "MATH 20B"]]},
    "CSE 107": {"credits": 5, "prerequisites": [["CSE 16"], ["AM 30", "MATH 22", "MATH 23A"]]}
}

# must pick 1 theory class
theory = {
    "CSE 102": {"credits": 5, "prerequisites": [["CSE 101"]]},
    "CSE 103": {"credits": 5, "prerequisites": [["CSE 101"]]}
}

# must pick 1 programming lang class
pl = {
    "CSE 112": {"credits": 5, "prerequisites": [["CSE 101"]]},
    "CSE 114A": {"credits": 5, "prerequisites": [["CSE 101"]]}
}


# must take 3 elective fillers here, plus a capstone that counts as an elective (listed after)
electives = {
    "elective 1": {"credits": 5, "prerequisites": []},
    "elective 2": {"credits": 5, "prerequisites": []},
    "elective 3": {"credits": 5, "prerequisites": []},
}

capstones = {
    "CSE 110B": {"credits": 5, "prerequisites": [["CSE 110A"]]},
    "CSE 115C": {"credits": 5, "prerequisites": [["CSE 115B"]]},
    "CSE 115D": {"credits": 5, "prerequisites": [["CSE 115A"]]},
    "CSE 121": {"credits": 7, "prerequisites": [["CSE 12", "CSE 100", "CSE 100L"],
            ["CSE 13E", "CSE 13S", "ECE 13", "CSE 15", "CSE 15L"],
            ["ECE 101", "ECE 101L", "PHYS 5C", "PHYS 5N"]
        ]
    },
    "CSE 134": {"credits": 5, "prerequisites": [["CSE 120", "CSE 130"]]},
    "CSE 138": {"credits": 5, "prerequisites": [["CSE 130", "CSE 131"]]},
    "CSE 140": {"credits": 5, "prerequisites": [["CSE 101", ["CSE 40", "STAT 132"]]]},
    "CSE 143": {
        "credits": 5,
        "prerequisites": [
            ["CSE 101"],
            ["CSE 107", "STAT 131"],
            ["CSE 40"]
        ]
    },
    "CSE 144": {
        "credits": 5,
        "prerequisites": [
            ["CSE 40", "STAT 132"],
            ["CSE 101"]
        ]
    },
    "CSE 145": {
        "credits": 5,
        "prerequisites": [
            ["CSE 15", "CSE 15L"],
            ["CSE 30"],
            ["CSE 13S"],
            ["AM 30", "MATH 22", "MATH 23A"],
            ["STAT 5", "CSE 107", "STAT 131"],
            ["AM 10", "MATH 21"],
            ["CSE 16", "ECON 113"]
        ]
    },
    "CSE 156": {
        "credits": 5,
        "prerequisites": [["CSE 150", "CSE 101"]],
        "concurrent": "CSE 156L"
    },
    "CSE 156L": {
        "credits": 2,
        "prerequisites": [["CSE 150", "CSE 101"]],
        "concurrent": "CSE 156"
    },
    "CSE 157": {
        "credits": 7,
        "prerequisites": [["CSE 121", "CSE 150"]]
    },
    "CSE 160": {
        "credits": 7,
        "prerequisites": [["CSE 101", ["MATH 21", "AM 10"]]]
    },
    "CSE 161": {
        "credits": 5,
        "prerequisites": [["CSE 160", "equivalent"]],
        "concurrent": "CSE 161L"
    },
    "CSE 161L": {
        "credits": 2,
        "concurrent": "CSE 161"
    },
    "CSE 162": {
        "credits": 5,
        "prerequisites": [["CSE 160", "equivalent"]],
        "concurrent": "CSE 162L"
    },
    "CSE 162L": {
        "credits": 2,
        "concurrent": "CSE 162"
    },
    "CSE 163": {
        "credits": 5,
        "prerequisites": [["CSE 101"]]
    },
    "CSE 168": {
        "credits": 7,
        "prerequisites": [["CSE 160"]]
    },
    "CSE 181": {
        "credits": 5,
        "prerequisites": [["CSE 180", "CSE 130"]]
    },
    "CSE 183": {
        "credits": 5,
        "prerequisites": [
            ["CSE 15", "CSE 15L"],
            ["CMPM 35"],
            ["CSE 101"]
        ]
    },
    "CSE 184": {
        "credits": 5,
        "prerequisites": [["CSE 101"]]
    },
    "CSE 187": {
        "credits": 5,
        "prerequisites": [["CSE 186"]]
    },
    "CMPM 172": {
        "credits": 7,
        "prerequisites": [["CMPM 171"]]
    },
}

# must pick 1 DC course

DCs = {
    "CSE 115A": {"credits": 5, "prerequisites": [["CSE 101"], ["CSE 130"]]},
    "CSE 185E": {"credits": 5, "prerequisites": [["CSE 30", "CSE 12"]]},
    "CSE 185S": {"credits": 5, "prerequisites": [["CSE 101"], ["CSE 101", "CSE 101M", "CSE 120", "CSE 130"]]},
    "CSE 195": {"credits": 5, "prerequisites": []}
}

In [95]:
### course funcs ###

import random

def can_take_course(course, prereqs, completed_courses):

  satisfied = 0
  for prereq in prereqs:
    if len(prereq) > 1:
      for option in prereq:
        if option in completed_courses:
          satisfied += 1
          break
    else:
      if prereq[0] in completed_courses:
        satisfied += 1
  if satisfied == len(prereqs):
    return True
  return False

def calc_courses():

    # select which calculus class to take
    calc1_course = random.choice(list(calc_1.keys()))

    if calc1_course == "MATH 19A":
      calc2_course = "MATH 19B"
    else: # taking honors calc series
      calc2_course = "MATH 20B"

    calc3_course = random.choice(list(calc_3.keys()))

    return calc1_course, calc2_course, calc3_course


def generate_schedule(courses):
    schedule = [[], [], [],
                [], [], [],
                [], [], [],
                ["ELECTIVE"], ["ELECTIVE"], ["ELECTIVE", "CAPSTONE"],]

    completed_courses = set()

    ### calc 1-3 ###

    [calc1_course, calc2_course, calc3_course] = calc_courses()

    start_calc_quarter = random.randint(0, 0)

    schedule[start_calc_quarter].append(calc1_course)
    schedule[start_calc_quarter+1].append(calc2_course)
    schedule[start_calc_quarter+2].append(calc3_course)

    quarter = 0

    ### linear alg ###

    # select which linear alg class to take
    alg_course = random.choice(list(linear_alg.keys()))

    if alg_course == "AM 10":
      start_alg_quarter = random.randint(0, 4)
    else:
      start_alg_quarter = random.randint(start_calc_quarter+1, 4)
    schedule[start_alg_quarter].append(alg_course)

    ### stats ###

    # select which stats class to take, and then add it to courses
    stats_course = random.choice(list(stats.keys()))

    if stats_course == "CSE 107":
      courses["CSE 107"] = {"credits": 5, "prerequisites": [["CSE 16"], ["AM 30", "MATH 22", "MATH 23A"]]}
    else:
      courses["STAT 131"] = {"credits": 5, "prerequisites": [["AM 11B", "ECON 11B", "MATH 11B", "MATH 19B", "MATH 20B"]]}


    ### theory ###

    # select which stats class to take, and then add it to courses
    theory_course = random.choice(list(theory.keys()))

    if theory_course == "CSE 102":
      courses["CSE 102"] = {"credits": 5, "prerequisites": [["CSE 101"]]}
    else:
      courses["CSE 103"] = {"credits": 5, "prerequisites": [["CSE 101"]]}

    ### pl ###

    # select which pl class to take, and then add it to courses
    pl_course = random.choice(list(pl.keys()))


    if pl_course == "CSE 112":
      courses["CSE 112"] = {"credits": 5, "prerequisites": [["CSE 101"]]}
    else:
      courses["CSE 114A"] = {"credits": 5, "prerequisites": [["CSE 101"]]}


    ### DC ###

    # select which DC class to take, and then add it to courses
    DC_course = random.choice(list(DCs.keys()))

    courses[DC_course] = DCs[DC_course]

    while courses:

        # print(quarter)
        if quarter == 12:
          break

        if quarter != 0:
          for c in schedule[quarter-1]:
            completed_courses.add(c)

        # current quarter's classes
        current_quarter = []

        # list of available courses that meet prerequisites
        available_courses = []

        # collect courses that can be taken this quarter
        for course in courses:
          prereq = courses[course]["prerequisites"]
          if can_take_course(course, prereq, completed_courses):
            available_courses.append(course)

        # randomly select up to 2 courses from the available courses
        random.shuffle(available_courses) # shuffle the available courses for randomness
        for course in available_courses[:2]:
          if len(schedule[quarter]) == 2:
            break
          current_quarter.append(course)
          completed_courses.add(course)
          del courses[course]
          # add the course to the schedule
          schedule[quarter].append(course)

        quarter += 1

    return schedule


In [96]:
# generate the schedule

def make_examples():

  all_schedules = []

  for i in range(10000):
    must_take = {
      "CSE 12": {"credits": 7, "prerequisites": [["CSE 5J", "CSE 20", "CSE 30", "BME 160"]]},
      "CSE 16": {"credits": 5, "prerequisites": [["MATH 20A", "MATH 19A", "MATH 19B", "MATH 11B", "AM 11B", "AM 15B", "ECON 11B"]]},
      "CSE 20": {"credits": 5, "prerequisites": []},
      "CSE 30": {"credits": 5, "prerequisites": [["CSE 20", "BME 160", "MATH 3", "MATH 11A", "MATH 19A", "AM 3", "AM 11A"]]},
      "CSE 40": {"credits": 5, "prerequisites": [["MATH 19B", "MATH 20B"], ["CSE 30"]]},
      "CSE 13S": {"credits": 7, "prerequisites": [["CSE 12", "BME 160"]]},
      "ECE 30": {"credits": 5, "prerequisites": [["MATH 19B", "MATH 20B"]]},

      "CSE 101": {"credits": 5, "prerequisites": [["CSE 12", "BME 160"], ["CSE 13E", "ECE 13", "CSE 13S"],
          ["CSE 16"], ["CSE 30"], ["MATH 11B", "MATH 19B", "MATH 20B", "AM 11B", "ECON 11B"]]},
      "CSE 101M": {"credits": 5, "prerequisites": [["CSE 101"]]},
      "CSE 120": {"credits": 5, "prerequisites": [["CSE 12"], ["CSE 13S", "CSE 13E", "ECE 13"]]},
      "CSE 130": {"credits": 5, "prerequisites": [["CSE 12"],["CSE 101"]]}
    }

    one_schedule = []

    course_schedule = generate_schedule(must_take)
    # print(must_take)

    # print the schedule
    for quarter, courses in enumerate(course_schedule, start=1):
        # print(f"Quarter {quarter}: {', '.join(course) if course else 'No courses available'}")
        one_schedule.append({'quarter':quarter, 'courses':courses})

    all_schedules.append(one_schedule)

    # print(one_schedule)

  # print(all_schedules)

  csv_file = 'course_schedules.csv'

  import csv

  # Specify the CSV file name
  csv_file = 'course_schedules.csv'


  with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
      writer = csv.writer(file)
      writer.writerow(['quarter', 'courses'])

      for schedule in all_schedules:
        for entry in schedule:
              quarter = entry['quarter']
              courses = entry['courses']
              writer.writerow([quarter, courses])
              # print([quarter, courses])

      # writer.writerow(["blank"]) # separate each schedule by a blank row

# yr = input("What year are you? ")
# prev_classes = input("What classes have you already taken? ")

make_examples()


i want you to iterate through "course_schedules.csv" # First CSV with quarter and courses

and for quarter 1-3:
- go to the courses column and iterate through those course, like ['MATH 20A', 'CSE 20'], and make sure to remove the space, e.g., CSE20

- go to "course_schedules.csv" (# First CSV with quarter and courses), and go to quarter where 1=Fall24, 2=Winter25, 3=Spring25

- go to the class_name column and find CSE 20, find there are multiply Fall24 CSE20 professors, randomly select one

after you complete quarters 1-3, move to the next instance of rows with those quarters


at the end you should have a new csv file with 1000 schedules from course_schedules.csv, but now there should be an added column that has a list for each quarter with 1, 2, or 3, that corresponds to the professors for that courses taken that quarter

In [97]:
import pandas as pd
import ast
import random


course_schedule_df = pd.read_csv("course_schedules.csv")
professors_df = pd.read_csv("ucsc_website_baskin_courses_2024.csv")
final_schedule_df = course_schedule_df.copy()

# add a new column 'professors' and initialize it with empty lists
final_schedule_df['professors'] = [[] for _ in range(len(final_schedule_df))]
final_schedule_df.to_csv("final_schedules.csv", index=False)

# filter the rows for quarter 1, 2, or 3
filtered_df = course_schedule_df[course_schedule_df['quarter'].isin([1, 2, 3])]

# iterate through the filtered DataFrame
for idx, row in filtered_df.iterrows():
    # convert the 'courses' column from a string representation of a list to an actual list
    courses = ast.literal_eval(row['courses'])  # gives us a list like ['MATH 19A', 'CSE 20']

    # iterate through each course in the courses list
    for course in courses:
        # remove spaces in course name
        cleaned_course = course.replace(" ", "")

        # determine the quarter based on the 'quarter' value
        quarter = row['quarter']
        if quarter == 1:
            quarter_str = "Fall24"
        elif quarter == 2:
            quarter_str = "Winter25"
        elif quarter == 3:
            quarter_str = "Spring25"

        # filter professors data to find relevant professors for the current course and quarter
        professor_matches = professors_df[(professors_df['quarter'] == quarter_str) &
                                          (professors_df['class_name'].str.replace(" ", "") == cleaned_course)]

        # if there are matching professors, randomly select one and add to the 'professors' column
        if not professor_matches.empty:
            selected_professor = random.choice(professor_matches['professor_name'].tolist())
            # add the professor to the professors list for the current row and course
            final_schedule_df.at[idx, 'professors'].append(selected_professor)

# save the final schedules with professors populated (as lists)
final_schedule_df.to_csv("final_schedules.csv", index=False)

# print(final_schedule_df.head(500))



1. Go through each schedule in final_schedules.csv, where 1 schedule is quarters 1 - 12. And then it repeats for each schedule.

2.  Find the corresponding professor for each professor in the column professors list in final_schedules.csv in ratemyprof.csv. Assign a goodness score for each professor (Goodness Score=Professor Rating−Professor Difficulty). Skip over empty professor lists.





In [98]:
import pandas as pd
import ast

course_schedule_df = pd.read_csv("final_schedules.csv")
ratemyprof_df = pd.read_csv("ratemyprof.csv")

final_schedule_df = course_schedule_df.copy()
final_schedule_df['score'] = [[] for _ in range(len(final_schedule_df))]

final_schedule_df.to_csv("completed.csv", index=False)
filtered_df = course_schedule_df[course_schedule_df['quarter'].isin([1, 2, 3])]

# iterate through the filtered DataFrame
for idx, row in filtered_df.iterrows():
    # convert the 'courses' column from string representation of a list to an actual list
    courses = ast.literal_eval(row['courses'])  # gives us a list like ['MATH 19A', 'CSE 20']
    profs = ast.literal_eval(row['professors'])  # gives us a list like ['Prof. Smith', 'Prof. Johnson']

    # initialize the total score for this row
    total_score = 0

    # iterate through each course and its corresponding professor
    for course, prof in zip(courses, profs):

        # clean the course name by removing spaces
        cleaned_course = course.replace(" ", "")

        # determine the quarter string based on the 'quarter' value
        quarter = row['quarter']
        if quarter == 1:
            quarter_str = "Fall24"
        elif quarter == 2:
            quarter_str = "Winter25"
        elif quarter == 3:
            quarter_str = "Spring25"

        # find the professor's rating and difficulty from ratemyprof.csv
        # print(prof)
        prof_data = ratemyprof_df[ratemyprof_df['professor_name'].str.contains(prof, case=False, na=False)]

        if not prof_data.empty:
            # assume the first match is the relevant professor
            rating = prof_data['professor_rating'].values[0]
            difficulty = prof_data['professor_difficulty'].values[0]

            # calculate the goodness score (rating - difficulty)
            goodness_score = rating - difficulty
        else:
            goodness_score = 0  # default to 0 if the professor is not found

        # add the goodness score to the total score for this row
        total_score += goodness_score

        # print(f"Course: {cleaned_course}, Quarter: {quarter_str}, Professor: {prof}, Goodness Score: {goodness_score}")

    # after processing all courses for this row, assign the total score to the 'score' column
    final_schedule_df.loc[idx, 'score'] = total_score

    # print("\nDone with this row\n")

# save the updated DataFrame to 'final_schedules_with_score.csv' with the 'score' column
final_schedule_df.to_csv("final_schedules_with_score.csv", index=False)

final_schedule_df.head(100)

# print("Final schedule with 'score' column saved as 'final_schedules_with_score.csv'.")


Unnamed: 0,quarter,courses,professors,score
0,1,"['MATH 19A', 'CSE 20']",['Hao Yue'],-2.3
1,2,"['MATH 19B', 'CSE 30']",['Hao Yue'],-2.3
2,3,"['AM 30', 'MATH 21']",[],0
3,4,"['ECE 30', 'CSE 16']",[],[]
4,5,"['CSE 107', 'CSE 40']",[],[]
...,...,...,...,...
95,12,"['ELECTIVE', 'CAPSTONE']",[],[]
96,1,"['MATH 19A', 'CSE 20']",['Larissa A Munishkina'],2.5
97,2,"['MATH 19B', 'MATH 21']",[],0
98,3,"['AM 30', 'CSE 16']",['Allen Van Gelder'],0.8


* from hugging face, get a pretrained neural network to train using the schedules
* the sample scheduels are in final_schedules_with_score.csv
* each schedule spans quarter 1 - 12, and then another schedule starts
* there are 10000 schedules, so split the data between train, dev, and test
* see if the NN can generate it's own schedules



