In [10]:
import numpy as np
import pandas as pd
import regex as re

from natsort import natsorted

# GS Matching Algorithm

In [11]:
def combine_by_summing(arr, groups, axis=0):
    groups = np.array(groups, dtype=int)
    
    # Check that the groups sum to the size along the given axis.
    if groups.sum() != arr.shape[axis]: exit(1)
    
    # Compute the starting indices of each group.
    indices = np.concatenate(([0], np.cumsum(groups)[:-1]))
    
    # Use np.add.reduceat to sum over the specified groups.
    return np.add.reduceat(arr, indices, axis=axis)

# TODO: Check randomness

In [None]:
def GaleShapleyAlgorithmQuota(P1, P2, quota, info=None):
    m, n = P1.shape  # m = size of Group 2, n = size of Group 1
    orig_m, orig_n = m, n  

    # Convert from 1-indexed to 0-indexed
    P1 = P1 - np.ones_like(P1)
    P2 = P2 - np.ones_like(P2)
    
    # standard == True means quotas on rows; otherwise, quotas on columns.
    standard = (quota.shape[0] == m)
    quota = quota.flatten()  # keep as vector of replication counts
    
    # We will fill `info_rep` after replication; `col2orig` after we know n.
    info_rep = None  # ADDED
    # ---- replicate the side with quotas (unchanged logic) ----
    if standard:    
        P1 = np.repeat(P1, quota, axis=0)
        P2 = np.repeat(P2, quota, axis=0)
    else:
        P1 = np.repeat(P1, quota, axis=1)
        P2 = np.repeat(P2, quota, axis=1)

    m, n = P1.shape  # New dimensions after replication

    # replicate info if provided
    if info is not None:
        info = np.asarray(info, dtype=int).flatten()
        if info.shape[0] != orig_m:
            raise ValueError("`info` must have length equal to the number of rows before replication.")
        info_rep = np.repeat(info, quota, axis=0) if standard else info.copy()

    # map each (possibly replicated) column index -> original column index
    if standard:
        # columns were not replicated
        col2orig = np.arange(n, dtype=int)
    else:
        # columns were replicated; expand mapping using `quota` over original columns
        col2orig = np.concatenate([np.full(q, j, dtype=int) for j, q in enumerate(quota)])

    # Always let columns propose. In both cases, we build P1_T from P1.
    P1_T = np.empty((m, n), dtype=int)
    for col in range(n):
        P1_T[:, col] = np.argsort(P1[:, col])

    NumStages = 0
    # For each (replicated) column, we store a pointer and a flag indicating if it should propose.
    cols_assigned = [[-1, True] for _ in range(n)]
    # For each row, we keep a list of columns that have proposed to it.
    rows_assigned = [[] for _ in range(m)]
    
    collisions = True
    while collisions:
        collisions = False
        NumStages += 1

        # has_match[c]  : whether this original column currently holds >=1 match
        # all_pos[c]    : all its matched rows have info>0
        # all_zero[c]   : all its matched rows have info==0
        if info_rep is not None:
            num_orig_cols = col2orig.max() + 1
            has_match = np.zeros(num_orig_cols, dtype=bool)
            all_pos   = np.ones(num_orig_cols,  dtype=bool)
            all_zero  = np.ones(num_orig_cols,  dtype=bool)
            for r in range(m):
                if rows_assigned[r]:
                    j = rows_assigned[r][0]            # the (replicated) column currently holding row r
                    oc = col2orig[j]                   # original column index
                    has_match[oc] = True
                    v = int(info_rep[r])
                    if v > 0:
                        all_zero[oc] = False
                    elif v == 0:
                        all_pos[oc] = False
                    else:  # negative -> treat as ">0"
                        all_zero[oc] = False
        else:
            has_match = all_pos = all_zero = None  # no info => no special tie-breaking

        # Each column that needs to propose does so by using its pointer into its ordering in P1_T.
        for i in range(n):
            if cols_assigned[i][1]:
                cols_assigned[i][1] = False
                cols_assigned[i][0] += 1
                p = cols_assigned[i][0]
                if p >= m:
                    continue  # This column has exhausted its list.

                # tie-break among equal-best rows at this preference level using `info`
                # Only if the column already has >=1 match and we have `info`.
                if info_rep is not None:
                    oc = col2orig[i]
                    if has_match is not None and has_match[oc]:
                        # Current best preference value for this column at pointer p
                        cand_row = P1_T[p, i]
                        best_val = P1[cand_row, i]

                        # Scan the tied block [p .. end of that value]
                        best_q = p
                        if all_pos[oc]:
                            # All current matches have info>0 → prefer a row with info==0 if available
                            q = p
                            while q < m and P1[P1_T[q, i], i] == best_val:
                                r = P1_T[q, i]
                                if info_rep[r] == 0:
                                    best_q = q
                                    break
                                q += 1
                        elif all_zero[oc]:
                            # All current matches have info==0 → prefer a row with info>0 if available
                            q = p
                            while q < m and P1[P1_T[q, i], i] == best_val:
                                r = P1_T[q, i]
                                if info_rep[r] > 0:
                                    best_q = q
                                    break
                                q += 1
                        # Mixed (both 0 and >0 present) → keep original order

                        # If we found a better q inside the tied block, swap so pointer p picks it now
                        if best_q != p:
                            P1_T[p, i], P1_T[best_q, i] = P1_T[best_q, i], P1_T[p, i]

                row = P1_T[p, i]
                rows_assigned[row].append(i)

        # Each row resolves collisions by keeping the best column (according to P2).
        for i in range(m):
            if len(rows_assigned[i]) <= 1:
                continue

            collisions = True
            # Choose the best proposal (the column with the lowest P2[i][col]), and record ties
            minRank, minCol, ties = P2[i][rows_assigned[i][0]], rows_assigned[i][0], 0
        
            for col in rows_assigned[i]:
                if P2[i][col] < minRank:
                    minRank, minCol, ties = P2[i][col], col, 0
                elif P2[i][col] == minRank:
                    ties += 1
            
            # For every other column that proposed, mark it to propose again.
            for col in rows_assigned[i]:
                if col != minCol:
                    cols_assigned[col][1] = True
                    
            rows_assigned[i] = [minCol]
    
    # Build the match matrix from the final assignments.
    Match = np.zeros_like(P1_T)
    for row in range(m):
        for assigned in rows_assigned[row]:
            Match[row][assigned] = 1

    Match = combine_by_summing(Match, quota, axis=0) if standard else combine_by_summing(Match, quota, axis=1)
    return Match, NumStages

# Data Handling

In [13]:
def load_csv(filename):
    # Load the CSV file (first row is header)
    df = pd.read_csv(filename)

    # Get column names
    all_columns = df.columns.tolist()

    # column G (7th column) = index 6 (0-indexed),
    # column T (20th column) = index 19.


    # col[len("Course Preferences")+2:-1]
    matching_columns = [col for col in df.columns if re.search("Course Preferences", col)] # This assumes that there is a closing bracket after the course
    if matching_columns == None: exit(1)

    choice_columns = matching_columns



    # Now filter the DataFrame to only include the desired columns:
    # "ID", the choice columns, "First Choice", and "Second Choice"
    cols_to_keep = ["ID"] + choice_columns + ["First Choice", "Second Choice"]
    df_filtered = df[cols_to_keep]

    # Prepare a new matrix: one row per ID and one column per choice column (G–T)
    # Initialize with zeros (assuming that if neither choice matches, the value stays 0)
    matrix = np.zeros((df_filtered.shape[0], len(choice_columns)), dtype=int)

    # Pre-populate the matrix with (first character as integer + 2) for each cell in columns G-T
    for i, row in df_filtered.iterrows():
        for j, col in enumerate(choice_columns):
            try:
                matrix[i, j] = int(str(row[col]).strip()[0]) + 2
            except (ValueError, IndexError):
                pass
    
    # Make sure normalized columns contain only the normalized text for matching
    normalized_cols = [c[20:-1].strip().lower() for c in choice_columns]
    normalized_cols = [c for c in normalized_cols if c]
    # print(normalized_cols)

    # Process each row of the filtered DataFrame.
    # For each row, find the index (in choice_columns) for the first and second choices
    # and assign 1 or 2 accordingly.
    for idx, row in df_filtered.iterrows():
        if not pd.isnull(row["First Choice"]): first_choice = row["First Choice"].strip().lower()
        if not pd.isnull(row["Second Choice"]): second_choice = row["Second Choice"].strip().lower()
        
        # Set value 1 for first choice if the header exists in choice_columns
        if first_choice in normalized_cols:
            col_index = normalized_cols.index(first_choice)
            matrix[idx, col_index] = 1
        
        # Set value 2 for second choice if the header exists in choice_columns
        if second_choice in normalized_cols:
            col_index = normalized_cols.index(second_choice)
            matrix[idx, col_index] = 2

    # print(df_filtered[df_filtered.columns[0]])
    ids = np.array(df_filtered[df_filtered.columns[0]])
    # print(ids, ids.shape)
    # print("Matrix before:", matrix)
    # print(matrix.shape)
    # np.column_stack((ids, matrix))
    matrix = np.insert(matrix, 0, ids, axis=1)
    matrix = np.insert(matrix, matrix.shape[1], df["Previous LA Service for CS"].to_numpy(), axis=1)
    # matrix = np.append(ids, matrix).reshape((matrix.shape[0], matrix.shape[1]+1))
    # np.concatenate(ids, matrix)
    # np.insert(matrix, range(matrix.shape[0]), ids)
    # print("matrix: ", matrix, matrix.shape)
    # for row in range(len(matrix)):


    # # dataframe if needed
    # result_df = pd.DataFrame(matrix, columns=choice_columns)
    return matrix, df_filtered, df

In [27]:
def load_prof(filename):
    df = pd.read_csv(filename, index_col=False).fillna(99)


    # Convert from floats to ints
    float_cols = df.select_dtypes(include=['float']).columns
    df[float_cols] = df[float_cols].astype(int)


    # Get quotas from column
    prof_quotas = df[df.columns[2]]
    prof_quotas = prof_quotas.to_numpy()[1:]


    # Sort list and eliminate non-courses
    from natsort import natsorted
    sorted_list = natsorted(prof_quotas.tolist())
    prof_quotas = np.array(sorted_list, dtype=prof_quotas.dtype)
    prof_quotas = [s for s in prof_quotas if s[0].isdigit()]


    # Make unique array
    quota_array = []
    curr = ''
    for code in prof_quotas:
        if curr != code: 
            quota_array.append(0)
            curr = code
        quota_array[-1] += 1

    

    df_formatted = df
    
    # Get only desired columns (in-place)
    df_formatted = df_formatted.iloc[:, np.r_[0, 3:df_formatted.shape[1]]] 

    df_formatted = df_formatted[1:]
    df_formatted = np.array(df_formatted, dtype=int)

    return df_formatted, np.array(quota_array).reshape((len(quota_array),1)), df

In [15]:
def verifyEmail(prof_name, dictionary):
    try:
        email = dictionary[prof_name]
    except:
        print(f"No email for {prof_name}!")
        return prof_name, None
    return prof_name, email


In [None]:
def courseTopChoices(df, export_path=""):
    # 1. Find First and Second Choice columns (should be consecutive)
    # 2. For each possible course (find start and end of course name columns)
    # 2.5. Create email dictionary
    # 3. Filter by those values per course, create a spreadsheet
    # 4. Return spreadsheets (OR PRODUCE CSV FILES?)

    # First Choice name
    first_choice_column_name = "First Choice"
    course_string = "Course Preferences"

    # Part 1

    first_choice_index = df.columns.get_loc(first_choice_column_name)

    # Part 2

    # Find column of all occurance of courses
    matching_columns = [col[len(course_string)+2:-1] for col in df.columns if re.search(course_string, col)] # This assumes that there is a closing bracket after the course
    if matching_columns == None: exit(1)


    # Part 2.5 
    data = pd.read_csv("email_directory.csv")
    data = dict(zip(data['Professor'], data['email']))


    # Part 3 & 4

    # First make sure professor emails exist in the directory
    matched_emails = []
    unmatched_names = []
    for course in matching_columns:
        prof = course[course.find("(")+1:course.find(")")]
        
        name, email = verifyEmail(prof, data)

        if email == None: unmatched_names.append(name)
        else: matched_emails.append((name, email))

    # if len(unmatched_names) != 0:
    #     exit(1)

    # Then generate the spreadsheets
    for course in matching_columns:

        course_df = df[(df['First Choice'] == course) | (df['Second Choice'] == course)]

        course_df.to_csv(export_path+course+'.csv', index=False)

        name, email = verifyEmail(prof, data)

        # TODO: Email CSV File

        # # Import smtplib for the actual sending function
        # import smtplib

        # # Import the email modules we'll need
        # from email.message import EmailMessage

        # # Open the plain text file whose name is in textfile for reading.
        # textfile = "test.txt"
        # with open(textfile) as fp:
        #     # Create a text/plain message
        #     msg = EmailMessage()
        #     msg.set_content(fp.read())

        # # me == the sender's email address
        # # you == the recipient's email address
        # msg['Subject'] = f'The contents of {textfile}'
        # msg['From'] = "erikfeng@ucsb.edu"
        # msg['To'] = "erikfeng@ucsb.edu"

        # # Send the message via our own SMTP server.
        # s = smtplib.SMTP('localhost')
        # s.send_message(msg)
        # s.quit()



# Iterative Matching

In [17]:
def matchMatrices(P1, P2):

    ### FILTER MATRICES TO COMMON IDS
    # 1) find the common IDs
    common_ids = np.intersect1d(P1[:,0], P2[:,0])
    # common_ids == array([10, 20])

    # 2) build boolean masks for each matrix
    maskA = np.isin(P1[:,0], common_ids)
    maskB = np.isin(P2[:,0], common_ids)

    # 3) filter
    P1 = P1[maskA]
    P2 = P2[maskB]

    ### SORT MATRICES
    # 1) pull out the ID columns
    ids1 = P1[:, 0]
    ids2 = P2[:, 0]

    # print(ids1, ids2)

    # 2) build a lookup from ID → row‐index in mat2
    pos_in_P2 = { id_: i for i, id_ in enumerate(ids2) }

    # 3) for each id in mat1, find its row in mat2
    order = [pos_in_P2[id_] for id_ in ids1]

    # 4) fancy‐index to reorder mat2
    P2_matched = P2[order]

    # print(P2_matched)
    return P1, P2_matched

In [18]:
def greedyMaxProbMatch(dmatch, quotas):
    final_match = np.zeros_like(dmatch[:, 1:])

    dmatch_temp = dmatch.copy()[:, 1:]

    temp_quotas = quotas.copy()

    while sum(temp_quotas) > 0:
        
        max_index = np.argmax(dmatch_temp)

        row_index, col_index = np.unravel_index(max_index, final_match.shape)

        # print(max_index, dmatch_temp)

        dmatch_temp[row_index, col_index] = 0

        # print(np.sum(final_match[row_index]), temp_quotas[col_index][0])
        if np.sum(final_match[row_index]) > 0 or temp_quotas[col_index][0] <= 0: continue

        final_match[row_index, col_index] = 1

        
        temp_quotas[col_index][0] -= 1
        # print(temp_quotas, temp_quotas[col_index])
    
    return np.insert(final_match, 0, dmatch[:, 0], axis=1)

# Main

In [35]:
from tqdm import tqdm

P2, quotas, df_prof = load_prof("Example_ULA_Applications_W24_2.csv")
# print(P2)
# print("QUOTAS", quotas)
# exit(0)

# with np.printoptions(threshold=np.inf):
#     print(load_csv("Example_ULA_Applications.csv")[0])


# exit(0)

P1, df_filtered, df_original = load_csv("Example_ULA_Applications_W24.csv")


# print(P1.shape[0])
# test_quotas = np.ones((P1.shape[0],1)).astype(int) * 3
# test_quotas = np.ones((1,P1.shape[1])).astype(int) * 3
test_quotas = quotas
# print(test_quotas)

# courseTopChoices(df_original)

# exit(0)




P1, P2 = matchMatrices(P1, P2)
experience = P1[:, -1]

# print(P1.shape, P2.shape, experience)
# print(P1[0], P2[0])


P1 = P1[:, :-1]



# print("Original Matrices: \n", P1.shape, P2.shape, test_quotas.shape)

match1, _ = GaleShapleyAlgorithmQuota(P2[:, 1:], P1[:, 1:], quota=test_quotas, info=experience)

# print(match1)

# exit(0)


p = np.random.permutation(P1.shape[0])


incorrect_sum = 0
incorrect = 0
iterations = 10000
total_match = np.zeros_like(P1)[:,1:]

for i in tqdm(range(iterations)):
    p = np.random.permutation(P1.shape[0])
    # print(p)
    s = np.empty(p.size, dtype=np.int32)
    for i in np.arange(p.size):
        s[p[i]] = i

    # print("s: ", s)

    P1_rand, P2_rand = matchMatrices(P1[p], P2)

    # print(np.all(P1_rand[s][:,0] == P1[:,0]))
    # print(test_quotas,p)

    
    # print(P1, P2)
    # print(P1[:, 1:].shape, P2[:, 1:].shape, test_quotas)

    # match1, _ = GaleShapleyAlgorithmQuota(P2[:, 1:], P1[:, 1:], quota=test_quotas)
    match2, _ = GaleShapleyAlgorithmQuota(P2_rand[:, 1:], P1_rand[:, 1:], quota=test_quotas, info=experience)

    match2 = match2[s]


    # with np.printoptions(threshold=np.inf):
    #     # if i == 0:
    #     #     print(P1-(P1[p])[s])
    #     #     print("MATCH DIFF:\n\n\n\n\n\n:", match1-match2)

    #     if not np.all(np.abs(match1-match2) < 0.1): incorrect += 1
    #     incorrect_sum += np.count_nonzero(match1-match2)

    total_match = total_match + match2

# TODO: Reliability in outcomes "krpendorf's alpha(?)"

# print("Randomized Matrices: \n", P1_rand, "\n", P2_rand)
# print("Matchings:")
# print(np.insert(match1, 0, P1[:, 0], axis=1))
# print(match2)

diffused_matches = np.round(np.insert(total_match / iterations * 1000, 0, P1[:, 0], axis=1))




# with np.printoptions(suppress=True,precision=1,threshold=np.inf):
#     print("DIFFUSED MATCHES: ")
#     print(diffused_matches)

#     print(np.round(np.insert(total_match / iterations * 1000, 0, P1[:, 0]/100, axis=1)))
#     # print(total_match)
#     print(np.sum(np.round(total_match) == iterations))
#     print(np.sum(np.round(total_match) >= iterations/2.0))
# print(np.sum(total_match / iterations, axis=1))
# print(np.sum(total_match / iterations, axis=0))



# print("MATCH DIFF:\n\n\n\n")

# print(P1_rand[:,1:].shape, P2_rand[:, 1:].shape, P1[:, 1:].shape, P2[:, 1:].shape)
# print(test_quotas, P1_rand[:, 1:], P2_rand[:, 1:], P1[:, 1:], P2[:, 1:], s, sep="\n\n")
# print("incorrect:", incorrect)
# print("incorrect sum:", incorrect_sum)
# print("avg total diff:", incorrect_sum *1.0 / iterations)
# print("avg diff:", incorrect_sum *1.0 / iterations / P1.shape[0])

# print(test_quotas, P1_rand[:, 1:], P2_rand[:, 1:], P1[:, 1:], P2[:, 1:], s, sep="\n\n")

diff = match1 * iterations - total_match
incorrect = np.sum(np.abs(diff))


# print(match1 * iterations, "\n", total_match)
# print("diff", diff)
print("\navg incorrect:", incorrect)
# print("avg incorrect sum:", incorrect_sum)
print("avg avg total diff:", incorrect *1.0 / iterations)
print("avg avg diff:", incorrect *1.0 / iterations / P1.shape[0])
print("\n")

with np.printoptions(suppress=True,precision=1):
    maxMatch = greedyMaxProbMatch(diffused_matches, test_quotas)
    # print(maxMatch)
    # print(match1)


# exit(0)

# with np.printoptions(threshold=np.inf):
#     print(match1, P1[:, 0])

# print(np.count_nonzero(match1))

# print(df_filtered)
# print(np.nonzero(match[0])[0][0])

# Don't want ID
df_filtered_columns_list = df_filtered.columns.to_list()[1:]

# print("ID:", df_filtered.iloc[51]["ID"], "Course:", df_filtered_columns_list[np.nonzero(match[51])[0][0]][20:-1])
# print(df_filtered_columns_list)
counter = 0
# print("MATCHES (ID, COURSE):")

match_dict = {}

# print(df_filtered)
for i in range(len(maxMatch)):
    # Skip non-matched
    # with np.printoptions(suppress=True,precision=1): print(maxMatch[i])
    if np.all(maxMatch[i, 1:] == 0): continue

    counter += 1

    id = str(int(maxMatch[i,0]))
    # print(maxMatch[i,0])
    # print()
    course_number = df_filtered_columns_list[np.nonzero(maxMatch[i, 1:])[0][0]][20:-1].split()[0]

    # print(id, course_number)
    match_dict[id] = course_number

# print(df_prof.iloc[:, [0,2]]['ID'])
# print("\n\n\n")


experience_dict = dict(zip(df_original.iloc[:, 1], df_original.iloc[:, 2]))
# print("EXP DICT:", experience_dict)

reference_dict = dict(zip(df_prof.iloc[1:, 0], df_prof.iloc[1:, 2]))
print("ALL PAIRED MATCHES:")
for id in match_dict: print(id, match_dict[id])
print("\n")

# print("REF DICT:", reference_dict)

# print('\n\n\n')

match_comparison = []
correct = 0
for reference in reference_dict:
    # print(reference, type(reference))
    aligned_match = reference, reference_dict.get(reference), match_dict.get(reference) if match_dict.get(reference) != None else "-"
    match_comparison.append(aligned_match)
    if aligned_match[1].lower().strip() == aligned_match[2].lower().strip(): 
        correct += 1
    else:
        continue
        # print(aligned_match)
    

    # print(aligned_match)
# print(df_prof.iloc[1:, [0,2]].to_dict())


course_reference_inv = {}
course_match_inv = {}
course_exp_reference = {}
course_exp_match = {}
for course in set(reference_dict.values()): course_exp_reference[course] = [0, 1]
for course in set(match_dict.values()): course_exp_match[course] = [0, 1]
for key, value in reference_dict.items():
    course_reference_inv.setdefault(value, []).append(key)
for key, value in match_dict.items():
    course_match_inv.setdefault(value, []).append(key)



# for 

# for course in set(reference_dict.values()):
#     if course == '-': continue
#     for student in course_reference_inv.get(course):
#         student = int(student)
#         print(course, student, course_exp_reference.get(course), experience_dict.get(student), type(student))
#         course_exp_reference.get(course)[0] += experience_dict.get(student)
#         course_exp_reference.get(course)[1] *= experience_dict.get(student)
    
# for course in set(reference_dict.values()):
#     print(course_exp_reference.get(course)[0] > 0, course_exp_reference.get(course)[0] == 0)
    
    

print("MATCHING PERFORMANCE RELATIVE TO GIVEN")
print(f"Identical Matches: {correct}, Total Matches: {len(match_comparison)}")
print(f"Relative Identical Matches: {round(correct/len(match_comparison)*100,2)}%\n")

# print(maxMatch)
# print(experience_dict)
# print(reference_dict)

# print(len(experience_dict), len(reference_dict))


# Build dictionary of ULAs per class
classes_dict = {}
for id in reference_dict:
    if reference_dict[id] not in classes_dict: classes_dict[reference_dict[id]] = [id]
    else: classes_dict[reference_dict[id]].append(id)

# Build dictionary of whether or not each class has a mixed-ULA experience (at least 1 new, 1 experienced)
class_experience_dict = classes_dict.copy()
avoided_ids = {}
for class_id in class_experience_dict:
    experienced, inexperienced = False, False

    # print(class_experience_dict)

    for student_id in class_experience_dict[class_id]:
        student_id = int(student_id)
        if student_id not in experience_dict:
            try: avoided_ids[student_id] = reference_dict[student_id]
            except: avoided_ids[student_id] = reference_dict[str(student_id)]
            continue
        experienced = experienced or experience_dict[student_id] > 0
        inexperienced = inexperienced or experience_dict[student_id] <= 0

    if experienced and inexperienced: class_experience_dict[class_id] = True
    elif experienced: class_experience_dict[class_id] = "No inexperienced"
    elif inexperienced: class_experience_dict[class_id] = "No experienced"
    else: class_experience_dict[class_id] = False

# print(class_experience_dict,"\n")
# print(reference_dict,"\n")
# print(experience_dict,"\n")

# print(list(class_experience_dict.values()).count(True))
print("Skipped matches:", avoided_ids)
print(f"Proportion of mixed exp classes: {round(list(class_experience_dict.values()).count(True) / len(class_experience_dict)*100, 2)}%")
    


# print(counter)


  values = array(values, copy=None, ndmin=arr.ndim, dtype=arr.dtype)
  0%|          | 0/10000 [00:00<?, ?it/s]

100%|██████████| 10000/10000 [00:04<00:00, 2339.82it/s]


avg incorrect: 165126
avg avg total diff: 16.5126
avg avg diff: 0.1260503816793893


ALL PAIRED MATCHES:
56764 5A
201482 8W
246205 111
392006 130A
282467 130B
286479 5A
450682 5B
12715 5A
358604 64
437393 16
353696 154
532540 130B
226728 24
326744 154
124908 130B
121163 5A
553587 5A
452520 130A
586513 8W
268654 8W
504594 5A
533793 16
392802 40
155573 156
68873 9
80296 16
236703 154
242174 24
238731 16
345323 130B
611178 16
635894 9
218710 16
534294 16
339740 9
244938 40
621851 40
245104 154
299050 8W
99385 24
576594 148
522736 5B
22665 16
14993 130B
561779 16
171863 16
447435 5B
453077 154
578302 111
46173 64
24997 148
431873 5A
492542 130B


MATCHING PERFORMANCE RELATIVE TO GIVEN
Identical Matches: 94, Total Matches: 131
Relative Identical Matches: 71.76%

Skipped matches: {65: 'W8'}
Proportion of mixed exp classes: 58.82%





# Benchmarking time

In [78]:
import time

In [None]:
def email_benchmark(num_emails: int, skip: int = 1, seeds: int = 1):
    times = []
    for counter in range(0, num_emails, skip):
        email_tuples = []
        for count in range(counter):
            email_tuples.append((f"erik({count})", "erikfeng16@gmail.com"))

        total_time = 0
        for seed in range(seeds):
            start_time = time.perf_counter()
            send_emails(email_tuples, "ula_rejection", name_variable="applicant")
            end_time = time.perf_counter()

            total_time += start_time-end_time

        total_time /= seeds

        times.append([counter, total_time])
    return times


In [87]:
# print(email_benchmark(10, seeds=10))

ula_rejection
data: {'current_quarter': 'Winter 25', 'next_quarter': 'Spring 25'}
LA positions for {{current_quarter}} have been filled

ula_rejection
data: {'current_quarter': 'Winter 25', 'next_quarter': 'Spring 25'}
LA positions for {{current_quarter}} have been filled

ula_rejection
data: {'current_quarter': 'Winter 25', 'next_quarter': 'Spring 25'}
LA positions for {{current_quarter}} have been filled

ula_rejection
data: {'current_quarter': 'Winter 25', 'next_quarter': 'Spring 25'}
LA positions for {{current_quarter}} have been filled

ula_rejection
data: {'current_quarter': 'Winter 25', 'next_quarter': 'Spring 25'}
LA positions for {{current_quarter}} have been filled

ula_rejection
data: {'current_quarter': 'Winter 25', 'next_quarter': 'Spring 25'}
LA positions for {{current_quarter}} have been filled

ula_rejection
data: {'current_quarter': 'Winter 25', 'next_quarter': 'Spring 25'}
LA positions for {{current_quarter}} have been filled

ula_rejection
data: {'current_quarter': '