w


In [2]:
import pandas as pd
import numpy as np
from pulp import LpProblem, LpVariable, lpSum, LpMinimize, LpBinary, LpStatus, value, PULP_CBC_CMD

In [4]:
# load data
vectorized_df = pd.read_csv("data/Vectorized_BU_Hub_Courses.csv")
vectorized_df.columns = vectorized_df.columns.str.strip()

# # Entire HUB requirement
# requirement_counts = {
#     'Philosophical Inquiry and Life’s Meanings': 1,
#     'Aesthetic Exploration': 1,
#     'Historical Consciousness': 1,
#     'Social Inquiry I': 1,
#     'Social Inquiry II': 1,
#     'Scientific Inquiry I': 1,
#     'Scientific Inquiry II': 1,
#     'Quantitative Reasoning I': 1,
#     'Quantitative Reasoning II': 1,
#     'First-Year Writing Seminar': 1,
#     'Writing-Intensive Course': 2,
#     'Writing, Research, and Inquiry': 1,
#     'Oral and/or Signed Communication': 1,
#     'Digital/Multimedia Expression': 1,
#     'Critical Thinking': 2,
#     'Research and Information Literacy': 2,
#     'Teamwork / Collaboration': 2,
#     'Creativity / Innovation': 2,
#     'The Individual in Community': 1,
#     'Global Citizenship and Intercultural Literacy': 2,
#     'Ethical Reasoning': 1
# }

# specific requirements
requirement_counts = {
    'Philosophical Inquiry and Life’s Meanings': 1,
    'Aesthetic Exploration': 1,
    'Historical Consciousness': 1,
    'Social Inquiry I': 0,
    'Social Inquiry II': 0,
    'Scientific Inquiry I': 0,
    'Scientific Inquiry II': 0,
    'Quantitative Reasoning I': 0,
    'Quantitative Reasoning II': 0,
    'First-Year Writing Seminar': 0,
    'Writing-Intensive Course': 1,
    'Writing, Research, and Inquiry': 0,
    'Oral and/or Signed Communication': 0,
    'Digital/Multimedia Expression': 0,
    'Critical Thinking': 0,
    'Research and Information Literacy': 0,
    'Teamwork / Collaboration': 1,
    'Creativity / Innovation': 0,
    'The Individual in Community': 1,
    'Global Citizenship and Intercultural Literacy': 1,
    'Ethical Reasoning': 1
}

# filters (edit as you please)
excluded_departments = ['CGS', 'SAR', 'QST']   # departments to exclude (probably get rid of CAS PY :D )
excluded_course_codes = ['CAS WR 153E']        # course codes to remove
excluded_keywords = ['Summer', 'Analysis']        # remove courses with these words in the title

# apply filters
vectorized_df = vectorized_df[
    ~vectorized_df['Course Code'].str.extract(r'([A-Z]+)', expand=False).isin(excluded_departments) &  # department
    ~vectorized_df['Course Code'].isin(excluded_course_codes) &                                        # specific courses
    ~vectorized_df['Course Title'].str.contains('|'.join(excluded_keywords), case=False, na=False)     # keyword filtering
].reset_index(drop=True)


In [5]:
# setup and single solution. for more, look next cell.

# setup
hub_columns = [col for col in vectorized_df.columns if col in requirement_counts]
A = vectorized_df[hub_columns].values
b = [requirement_counts[col] for col in hub_columns]
courses = vectorized_df["Course Code"] + " - " + vectorized_df["Course Title"]

# unique course key: department & number (e.g. CAS WR 153)
vectorized_df["Course Key"] = vectorized_df["Course Code"].str.extract(r'([A-Z]+\s+\w+)', expand=False)
course_keys = vectorized_df["Course Key"]
course_key_to_indices = {}
for idx, key in enumerate(course_keys):
    course_key_to_indices.setdefault(key, []).append(idx)

# problem setup
prob = LpProblem("BU_Hub_Course_Selection", LpMinimize)
x = [LpVariable(f"x_{i}", cat=LpBinary) for i in range(len(courses))]

prob += lpSum(x)

# constraints (satisfy each given hub requirement)
for j, req in enumerate(hub_columns):
    prob += lpSum(A[i][j] * x[i] for i in range(len(courses))) >= b[j], f"Requirement_{req}"

# constraint (prevent duplicate department-courses)
for key, indices in course_key_to_indices.items():
    if len(indices) > 1:
        prob += lpSum(x[i] for i in indices) <= 1, f"Unique_{key}"

# solve (thanks PuLP)
prob.solve(PULP_CBC_CMD(msg=0))

# results
selected_courses = [courses[i] for i in range(len(courses)) if x[i].varValue == 1]
print(f"Number of courses selected: {len(selected_courses)}")
print("Courses:")
for course in selected_courses:
    print("-", course)

Number of courses selected: 3
Courses:
- CAS AH 333 - Arts of Classical Greece
- CAS RN 245 - The Quest for God and the Good
- CAS PO 379S - Religion and Politics


In [None]:
# store the first solution
found_solutions = [set(i for i in range(len(x)) if x[i].varValue == 1)]

print("Solution 1:")
for i in found_solutions[0]:
    print("-", courses[i])
print("-----")

# loop to find more (limit set at 100, change to your liking)
while len(found_solutions) < 100:
    # constraint to block the last solution
    prob += lpSum([x[i] for i in found_solutions[-1]]) <= len(found_solutions[-1]) - 1

    # solve again
    prob.solve(PULP_CBC_CMD(msg=0))

    if LpStatus[prob.status] != 'Optimal':
        print("No more feasible solutions.")
        break

    # save new solution
    new_solution = set(i for i in range(len(x)) if x[i].varValue == 1)

    # print solution
    print(f"Solution {len(found_solutions) + 1}:")
    for i in new_solution:
        print("-", courses[i])
    print("-----")

    # add to list of found solutions
    found_solutions.append(new_solution)