In [None]:
import pandas as pd
import numpy as np
import scipy.optimize as opt
from scipy.sparse import lil_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Importing and Cleaning Data

In [None]:
roster = pd.read_csv("roster1.csv") # from cal central or bcourses
prefs_form = pd.read_csv("prefs.csv") # from Google Forms

In [None]:
roster.head()

In [None]:
prefs_form.head()

In [None]:
# see how many enrolled and waitlisted students
roster["Role"].value_counts()

In [None]:
# only get enrolled students
enrolled_roster = roster[roster["Role"] == "Student"]

In [None]:
# make sure only one form response/preference per email
prefs_form["Email Address"].value_counts().max()

In [None]:
# inner join roster with preferences to only get students who filled out form and are on the roster
joined = enrolled_roster.merge(prefs_form, left_on='Email Address', right_on="Email Address")

In [None]:
joined.head()
joined.shape

In [None]:
# All the columns in the preferences table have the workd "preferences" in them
lab_cols = [c for c in joined.columns if "preferences for lab" in c.lower()]
sec_cols = [c for c in joined.columns if "discussion" in c.lower()]
util_map = {
    "Cannot make this time.": 0.,
    "Least Preferred": 1.,
    "Moderately Preferred": 2.,
    "Strongly Preferred": 3.,
    "Strong Preferred": 3. #oops typo in form.
}
# Convert strings to utility scores
lab_prefs = joined[lab_cols].replace(util_map)
sec_prefs = joined[sec_cols].replace(util_map)

# Print dimensions
(n_students, n_labs) = lab_prefs.shape
(n_students, n_sections) = sec_prefs.shape
print("Number of Students:", n_students)
print("Number of Labs Times:", n_labs)
print("Number of Discussion Slots:", n_sections)

In [None]:
# View the discussion time/location slots
sec_cols

In [None]:
# View the lab time slots
lab_cols

# Setting The Section and Lab Sizes

This needs to be updated to reflect actual capacity or remaining capacity

In [None]:
# Number of physical discussions per slot
# Note that for lab, we use TIME slots. For discussion, we use TIME/LOCATION slots.
lab_num_slots = np.array([4, 4, 4, 4, 4, 4, 1, 1, 4, 2, 3, 3, 0])
disc_num_slots = np.array([1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 0, 2, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 2])

# Capacities during each slot. We decrement this during assignment, so it represents the number of spots remailing.
sec_sizes = disc_num_slots * 35
lab_sizes = lab_num_slots * 35

In [None]:
# Ensure that you have the expected number of physical labs
sum(lab_num_slots)

In [None]:
# Ensure that this total lab capacity is greater than the number of students who you are assigning to sections
sum(lab_sizes)

In [None]:
# Ensure that you have the expected number of physical discussions
sum(disc_num_slots)

In [None]:
# Ensure that this total discussion capacity is greater than the number of students who you are assigning to sections
sum(sec_sizes)

# Optimizing the Assignments

In [None]:
def run_lp(prefs, cap, fuzzing=1.0e-5):
    """
    This function returns the optimal section assignments
    
    prefs is an n_students by n_sections matrix of utility values. Larger values are better.
    cap is an n_sections vector of section sizes.
    """
    
    (n,d) = prefs.shape
    
    # The optimizer MINIMIZES the sum of the weights.
    w = -prefs.flatten() # prefs is in row major form. 

    ### This is a bit of a hack but the problem is not well conditioned 
    ### so by adding a small amount of noise we ensure a single solution.
    
    w += fuzzing * np.random.randn(len(w)) 
    
    # The equality constraints enforces that every student is in one section
    # Each row in Aeq is the constrain for a single student
    
    Aeq = lil_matrix((n, n*d))
    for i in range(n):
        Aeq[i,(i*d):((i+1)*d)] = 1.
    Aeq = Aeq.asformat("csr")
    # Note we will also add a 0 < x < 1 constraint in the bounds arg to linprog
    
    # The inequality constraint ensures that no room has too many students
    Aub = lil_matrix((d, n*d))
    for i in range(d):
        tmp = np.zeros((n,d))
        tmp[:,i] = 1.
        Aub[i,:] = tmp.flatten()
    Aub = Aub.asformat("csr")
    
    options = dict(
        sparse=True, # Treat the constraint matrices as sparse
#         maxiter=100000,
#         tol=1.0e-10,
        disp=False)
    
    res = opt.linprog(w, Aub, cap, Aeq, np.ones(n), bounds = (0,1), options=options)
    return res

def compute_assignments(prefs, cap, unhappy = 0.0):
    """
    Compute the section assignments from the output of the optimization.
    prefs: pandas dataframes of section preferences used for the optimization
    cap: a numpy array of the section sizes
    """
    soln = run_lp(prefs.to_numpy(), cap)
    (n,d) = prefs.shape
    rounded_x = np.round(soln.x.reshape(n,d)) # Rounding the solution
    print("Over Capacity:", np.sum(rounded_x.sum(axis=0) > cap))
    print("Unassigned:", np.sum(rounded_x.sum(axis=1) != 1.))
    result = pd.DataFrame(
        {"Assignment": np.nonzero(rounded_x)[1], 
         "Happyness": prefs.to_numpy().flatten()[rounded_x.flatten() == 1.]}, 
        index=prefs.index)
    print("Unhappy Students:", np.sum(result['Happyness'] == 0))
    return result

In [None]:
# Set random seed to get the same assignments
np.random.seed(4)

# Run LP to assign students to a time slot for lab
lab_opt = compute_assignments(lab_prefs, lab_sizes)

In [None]:
# Set random seed to get the same assignments
np.random.seed(4)

# Run LP to assign students to a time slot for lab
sec_opt = compute_assignments(sec_prefs, sec_sizes)

In [None]:
# Check the discussion assignments.
# Each row represents a student, the index of their assigned discussions slot, and their utility.
sec_opt

# Examine Assignments

In [None]:
# Plot distribution of happiness for lab
lab_opt["Happyness"].value_counts().sort_index().plot(kind="bar")
plt.title("Lab Happiness")
plt.ylabel("Number of Students")
plt.xlabel("Lab Happiness Score")
plt.savefig("lab_final.png")

In [None]:
# Plot distribution of happiness for discussion
sec_opt["Happyness"].value_counts().sort_index().plot(kind="bar")
plt.title("Discussion Happiness")
plt.ylabel("Number of Students")
plt.xlabel("Disc Happiness Score")
plt.savefig("disc_final.png")

In [None]:
# Ensure that indicies are same, so students did not get shuffled.
sec_opt.index is joined.index

In [None]:
sec_opt["Assignment"].max()

In [None]:
# TODO: Check which students are unhappy.

# Assign Students to Physical Labs

# Assign Students to Physical Discussions