In [66]:
import numpy as np
import pandas as pd

In [71]:
def read_tasect(tasfile = 'tas.csv', sectfile = 'sections.csv'):
    ''' Reads and cleans section and TA information'''
    # read in files
    tas = pd.read_csv(tasfile)
    sect = pd.read_csv(sectfile)    
    # extract and clean sect information
    clean_sect = sect.drop(columns=['instructor', 'location', 'students', 'topic'])
    clean_sect['daytime'], time_index = pd.factorize(clean_sect['daytime'])
    # extract and clean tas information
    mapping = {'U':0,'W':1,'P':2}
    clean_tas = tas.drop(columns=['name'])
    for col in clean_tas.columns[2:]:
        clean_tas[col] = clean_tas[col].map(mapping)
    return clean_tas, clean_sect

In [75]:
tas, sect = read_tasect()

In [76]:
tas

Unnamed: 0,ta_id,max_assigned,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,0,1,0,0,0,0,1,2,0,1,2,1,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,1,0,0,0,0,0,0,2,1,2,0,0,0,0,0,0,1,1
3,3,3,0,0,0,0,1,1,0,0,0,0,0,0,1,1,2,0,0
4,4,1,0,0,0,0,1,1,1,2,1,0,0,0,0,0,0,0,0
5,5,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,1,0,0
6,6,1,0,0,0,0,1,1,1,1,1,0,0,0,2,2,2,2,1
7,7,2,0,1,2,2,0,0,0,0,0,1,0,0,0,0,0,2,1
8,8,1,2,0,0,0,0,0,1,1,0,0,0,0,2,1,1,0,0
9,9,1,0,2,2,2,0,0,0,0,0,0,2,2,0,0,0,0,0


In [77]:
sect

Unnamed: 0,section,daytime,min_ta,max_ta
0,0,0,3,4
1,1,1,3,4
2,2,1,2,3
3,3,1,2,3
4,4,2,3,4
5,5,2,3,4
6,6,3,2,3
7,7,3,2,3
8,8,3,3,4
9,9,4,3,4


In [107]:
tas_n = tas.to_numpy()
sect_n = sect.to_numpy()

In [108]:
# create a random matrix
array = np.random.randint(0, 2, size=(len(sect), len(tas)))

In [115]:
'''
1. Minimize overallocation of TAs (overallocation): Each TA specifies how many labs they can
support (max_assigned column in tas.csv). If a TA requests at most 2 labs and you assign to them 5
labs, that’s an overallocation penalty of 3. Compute the objective by summing the overallocation
penalty over all TAs. There is no minimum allocation.
'''
def overalloc(array, tas_n):
    ta_alloc = array.sum(axis=0)
    # ta_req = tas['max_assigned']
    ta_req = tas_n[:,1] # get max_assigned column
    
    difference = ta_alloc - ta_req
    difference[difference < 0] = 0
    
    total_difference = np.sum(difference)
    
    return total_difference

304


array([1, 0, 1, 3, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1,
       2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 1, 0],
      dtype=int64)

In [ ]:
'''Objectives
1. Minimize overallocation of TAs (overallocation): Each TA specifies how many labs they can
support (max_assigned column in tas.csv). If a TA requests at most 2 labs and you assign to them 5
labs, that’s an overallocation penalty of 3. Compute the objective by summing the overallocation
penalty over all TAs. There is no minimum allocation.

2. Minimize time conflicts (conflicts): Minimize the number of TAs with one or more time conflicts. A
time conflict occurs if you assign a TA to two labs meeting at the same time. If a TA has multiple
time conflicts, still count that as one overall time conflict for that TA.

3. Minimize Under-Support (undersupport): If a section needs at least 3 TAs and you only assign 1,
count that as 2 penalty points. Minimize the total penalty score across all sections. There is no
penalty for assigning too many TAs. You can never have enough TAs.

4. Minimize the number of times you allocate a TA to a section they are unwilling to support
(unwilling). You could argue this is really a hard constraint, but we will treat it as an objective to be
minimized instead.

5. Minimize the number of times you allocate a TA to a section where they said “willing” but not
“preferred”. (unpreferred). In effect, we are trying to assign TAs to sections that they prefer. But we
want to frame every objective a minimization objective. So, if your solution score has unwilling=0
and unpreferred=0, then all TAs are assigned to sections they prefer! Good job!'''