In [None]:
import pandas as pd
import numpy as np

In [None]:
def get_room_number_vec(size,
                        room_numbers=(110, 120),
                        tol=2):
    """returns vector fo randomly-assigned room numbers.
    
    size : int
        number of elements in returned vector, i.e.,
        number of students to assign room numbers
    room_numbers : tuple
        of possible room numbers to assign
    tol : int
        tolerance.
    
    Creates vector with np.random.choice, which by default
    assigns probabilities to possible numbers with a uniform
    distribution
    """
    room_numbers = np.asarray(room_numbers, dtype=np.int64)
    good_draw = False
    while not good_draw:
        assignments = np.random.choice(room_numbers, size)
        num_assigns = [np.where(assignments==room_num)[0].shape[0]
                      for room_num in room_numbers]
        diffs = []
        for ind, num in enumerate(num_assigns[:-1]):
            rest = num_assigns[ind+1:]
            for other_num in rest:
                diffs.append(abs(num - other_num))
        max_diff = np.max(diffs)
        if max_diff < tol:
            good_draw = True
    
    return assignments

In [None]:
COLUMNS_TO_KEEP = ['First Name', 'Last Name', 'Ticket Type', 'Email']

def room_sort(csv, constraints,
              room_numbers=(110, 120),tol=2):
    """assigns students in a .csv file to a room and
    returns sorted Pandas dataframe with room numbers added
    
    Parameters
    ----------
    csv : str
        path to .csv file
    constraints : dict
        where keys are student names and values are lists of other
        student names. Each student in the list will be assigned the
        same room as the student whose name is the key
    room_numbers : tuple
        of integer room numbers. Used by `get_room_number_vec` function.
    tol : int
        tolerance. Maximum allowable difference in number of students
        between rooms. Used by `get_room_number_vec` function.
        `constraints` are currently applied after assigning room numbers, 
        so it's best to double-check the balance between rooms.
    """
    df = pd.read_csv(csv)
    df = df[COLUMNS_TO_KEEP]
    df['First Name'] = df['First Name'].str.title()
    df['Last Name'] = df['Last Name'].str.title()

    grad_df = df[df['Ticket Type'].str.contains("Graduate")]
    grad_rm = get_room_number_vec(grad_df.shape[0])
    grad_df['Room Number'] = grad_rm

    ugrad_df = df[df['Ticket Type'].str.contains("Undergraduate")]
    ugrad_rm = get_room_number_vec(ugrad_df.shape[0])
    ugrad_df['Room Number'] = ugrad_rm

    final_df = pd.concat([grad_df, ugrad_df])
    final_df = final_df.reset_index(drop=True)

    for student_1, student_list in constraints.items():
        s1_first, s1_last = student_1.split(' ')
        s1_row = final_df[final_df['Last Name'].str.contains(s1_last)]
        s1_row = s1_row[s1_row['First Name'].str.contains(s1_first)]
        if s1_row.shape[0] > 1:
            raise ValueError(
                'Found more than one row for {}'.format(student_1)
            )
        s1_rm_num = s1_row['Room Number'].values.tolist()[0]
        for student_2 in student_list:
            s2_first, s2_last = student_2.split(' ')
            s2_row = final_df[
                final_df['Last Name'].str.contains(s2_last)
            ]
            s2_row = s2_row[s2_row['First Name'].str.contains(s2_first)]
            if s2_row.shape[0] > 1:
                raise ValueError(
                    'Found more than one row for {}'.format(student_2)
                )
            elif s2_row.shape[0] < 1:
                import pdb;pdb.set_trace()
            s2_rm_num = s2_row['Room Number'].values.tolist()[0]
            
            if s2_rm_num != s1_rm_num:
                final_df.loc[s2_row.index, 'Room Number'] = s1_rm_num

    final_df = final_df.sort_values(by=['Room Number', 'Last Name'])                
    return final_df

In [None]:
csv = 'report-2019-02-08T0853.csv'

constraints = {
    'Sharvil Patel': ['Alec Reinhardt', 'David Ji'],
    'Anthony Sementilli': ['Christella Gordon', 'Olga Taran'],
    'Aiden Ford': ['Mitra Kumareswaran'],
}

In [None]:
final_df = room_sort(csv, constraints)

In [None]:
with open('swc-room-assignments-for-instructors.csv', 'w') as fp:
    final_df.to_csv(fp)

# version without emails, to send to students
for_students_df = final_df.drop(labels='Email', axis=1)
for_students_df = for_students_df.sort_values(by=['Last Name'])
with open('swc-room-assignments.csv', 'w') as fp:
    for_students_df.to_csv(fp)

for_students_df.to_excel('swc-room-assignments.xlsx')