In [1]:
import numpy as np
from tqdm import tqdm

from gale_shapley import run_matching
from oneshot import oneshot

In [2]:
def dict_to_list(dictionary):
	lst = [None] * len(dictionary.keys())
	for i in range(len(lst)):
		lst[i] = dictionary.get(i)
	return lst

In [3]:
def compute_metrics(bins):
	counts = np.array([len(bin) for bin in bins])
	num_students = np.sum(counts)

	match_ratio = 1 - (counts[-1] / num_students)
	average_placement = np.sum(counts[1:] / num_students * np.array(range(1, 13)))

	return match_ratio, average_placement

In [4]:
def merge(a, b):
	if len(a) != len(b):
		raise ValueError()

	res = [[]] * len(a)
	for i in range(len(res)):
		res[i] = a[i] + b[i]

	return res

In [32]:
stages = {
	1: { 'sel': 1, 'rnk': 1, 'adm': 1, 'max_schools': True },  # Rand selection, rand ordering, open schools
	2: { 'sel': 1, 'rnk': 0, 'adm': 1, 'max_schools': True },  # Rand selection, like ordering, open schools
	3: { 'sel': 0, 'rnk': 1, 'adm': 1, 'max_schools': True },  # Pop selection, rand ordering, open schools
	4: { 'sel': 0, 'rnk': 0, 'adm': 1, 'max_schools': True },  # Pop selection, like ordering, open schools
	5: { 'sel': None, 'rnk': None, 'adm': 1, 'max_schools': True },  # Combination students, open schools
	6: { 'sel': None, 'rnk': None, 'adm': 2, 'max_schools': True },  # Combination students, EdOpt schools
	7: { 'sel': None, 'rnk': None, 'adm': 3, 'max_schools': True },  # Combination students, screen schools
	8: { 'sel': None, 'rnk': None, 'adm': None, 'max_schools': True },  # Combination students & schools
	9: { 'sel': None, 'rnk': None, 'adm': None, 'max_schools': False }  # Combination students & schools, variable list length
}

In [19]:
# Compute statistics for all stages
stats = {}
match_ratios = np.zeros((9))
average_placements = np.zeros((9))

for stage, params in stages.items():
	print(f'\nComputing metrics for stage {stage}')

	bins_combined = [[]] * 13
	gpa_by_placement = [[]] * 13
	# Compute metrics over different random states
	for random_state in tqdm(range(1, 11)):
		# Generate input
		students, schools, student_info, school_info = oneshot(seed=random_state, return_list=True, **params)
		# Run the matching
		bins, matches, _ = run_matching(students, student_info, schools, school_info)
		# Merge bins over random states
		bins_list = dict_to_list(bins)
		bins_combined = merge(bins_combined, bins_list)
		for student_id, outcome in matches.items():
			gpa = student_info[student_id][5]
			placement = outcome['rank'] or 13
			gpa_by_placement[placement - 1] = gpa_by_placement[placement - 1] + [gpa]

	# Save stats for later
	counts = np.array([len(bin) for bin in bins_combined])
	medians = [sorted(bin)[len(bin)//2] for bin in bins_combined]
	gpa_avg = [np.mean(pl) for pl in gpa_by_placement]
	stats[stage] = { 'counts': counts, 'medians': medians, 'gpas': gpa_avg }

	# Compute the metrics and append to results
	match_ratio, average_placement = compute_metrics(bins_combined)
	match_ratios[stage - 1] += match_ratio
	average_placements[stage - 1] += average_placement


Computing metrics for stage 1


100%|██████████| 10/10 [07:31<00:00, 45.11s/it]



Computing metrics for stage 2


100%|██████████| 10/10 [24:09<00:00, 144.97s/it]



Computing metrics for stage 3


100%|██████████| 10/10 [08:22<00:00, 50.27s/it]



Computing metrics for stage 4


100%|██████████| 10/10 [23:40<00:00, 142.09s/it]



Computing metrics for stage 5


100%|██████████| 10/10 [16:26<00:00, 98.60s/it] 



Computing metrics for stage 6


100%|██████████| 10/10 [16:34<00:00, 99.43s/it] 



Computing metrics for stage 7


100%|██████████| 10/10 [16:56<00:00, 101.64s/it]



Computing metrics for stage 8


100%|██████████| 10/10 [16:52<00:00, 101.27s/it]



Computing metrics for stage 9


100%|██████████| 10/10 [20:13<00:00, 121.35s/it]


In [63]:
np.save('./Data/Metrics/stats.npy', stats, allow_pickle=True)
np.save('./Data/Metrics/match_ratios.npy', match_ratios)
np.save('./Data/Metrics/average_placements.npy', average_placements)

In [50]:
stats = np.load('./Data/Metrics/stats.npy', allow_pickle=True).item()
match_ratios = np.load('./Data/Metrics/match_ratios.npy')
average_placements = np.load('./Data/Metrics/average_placements.npy')

In [60]:
for stage in stages.keys():
	print(f'Stage {stage}: mr = {match_ratios[stage-1]}, ap = {average_placements[stage-1]}')

Stage 1: mr = 0.9260757894736842, ap = 2.0215536842105264
Stage 2: mr = 0.9869108771929824, ap = 3.8595550877192983
Stage 3: mr = 0.9639887719298246, ap = 1.5317277192982455
Stage 4: mr = 0.9484112280701754, ap = 6.221454035087719
Stage 5: mr = 0.9830343859649123, ap = 2.165397894736842
Stage 6: mr = 0.9830821052631579, ap = 2.162769122807017
Stage 7: mr = 0.9829600000000001, ap = 2.1648252631578946
Stage 8: mr = 0.983121403508772, ap = 2.1668898245614034
Stage 9: mr = 0.9541150877192982, ap = 1.6068856140350878


In [61]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def generate_hex_labels():
	labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 'b', 'c', 'd', 'e', 'f']
	ticks = [int('0x' + str(l) + '0000000', 0) for l in labels]
	return ticks, labels

def hex_to_int(numbers: list[str]) -> np.array:
	numbers_truncated = [num[:8] if len(num) > 8 else num for num in numbers]
	return np.array([int('0x' + num, 0) for num in numbers_truncated])

In [67]:
stage = 6  # 1 ... 9
counts, medians, gpas = stats[stage]['counts'], stats[stage]['medians'], stats[stage]['gpas']

num_students = np.sum(counts)
choices = list(range(1, 13)) + [13.5]

fig = make_subplots(specs=[[{'secondary_y': True}]])
fig.add_trace(go.Bar(x=choices, y=counts/num_students, name='Percentage of students matched'), secondary_y=False)
fig.add_trace(go.Scatter(x=choices, y=gpas, name='Average GPA'), secondary_y=True)
# fig.add_trace(go.Scatter(x=choices, y=hex_to_int(medians), name='Median lottery number'), secondary_y=True)

ticks_hex, labels_hex = generate_hex_labels()
fig.update_layout(
	width=600,
	height=400,
	showlegend=False,
	xaxis=dict(title='Placement in ranking', tickmode='array', tickvals=choices, ticktext=choices[:-1]+['Unmatched']),
	yaxis=dict(title='Students matched', range=[0, 1], tickformat='.0%'),  # Histogram
	yaxis2=dict(title='Average GPA', range=[0, 100]),  # GPA plot
	# yaxis2=dict(title='Median lottery number (first digit)', tickmode='array', tickvals=ticks_hex, ticktext=labels_hex, range=[0, 4.295e9], showgrid=False),
	margin={ 't': 20, 'l': 0, 'b': 0, 'r': 0 },
	font={ 'family': 'Lato' }
)

fig.show()

In [70]:
choices = list(range(1, 13)) + [13.5]
ticks_hex, labels_hex = generate_hex_labels()

for stage in range(1, 10):
	counts, medians, gpas = stats[stage]['counts'], stats[stage]['medians'], stats[stage]['gpas']
	num_students = np.sum(counts)

	fig = make_subplots(specs=[[{'secondary_y': True}]])
	fig.add_trace(go.Bar(x=choices, y=counts/num_students, name='Percentage of students matched'), secondary_y=False)
	fig.add_trace(go.Scatter(x=choices, y=gpas, name='Average GPA', marker={'size': 20}, line={'width': 5}), secondary_y=True)
	# fig.add_trace(go.Scatter(x=choices, y=hex_to_int(medians), name='Median lottery number', marker={'size': 20}, line={'width': 5}), secondary_y=True)

	fig.update_layout(
		width=1600,
		height=1200,
		showlegend=False,
		xaxis=dict(title='Placement in ranking', tickmode='array', tickvals=choices, ticktext=choices[:-1]+['Unmatched']),
		yaxis=dict(title='Students matched', range=[0, 1], tickformat='.0%'),
		yaxis2=dict(title='Average GPA', range=[0, 100]),
		# yaxis2=dict(title='Median lottery number (first digit)', tickmode='array', tickvals=ticks_hex, ticktext=labels_hex, range=[0, 4.295e9], showgrid=False),
		margin={ 't': 50, 'l': 0, 'b': 0, 'r': 0 },
		font={ 'family': 'Lato', 'size': 36 }
	)

	fig.write_image(f'../Report/hist_stage_{stage}_gpa.png')