In [1]:
import numpy as np
from matplotlib  import pyplot as plt
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
from matplotlib import cm
from matplotlib import rc
import matplotlib.colors as colors
import scipy.stats as st
from scipy.stats import multivariate_normal
from scipy.stats import norm
from scipy.optimize import fsolve
from scipy.misc import derivative
import seaborn as sns
from seaborn import heatmap
import pandas as pd
from IPython.display import set_matplotlib_formats
import random
import time
set_matplotlib_formats('png', 'pdf')
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": 'Computer Modern Serif'
})

  set_matplotlib_formats('png', 'pdf')


## Cumulative probability functions

In [2]:
def cdf(x, sigma):
    return norm.cdf(x, scale = sigma)

In [3]:
def cdfm(x, y, sigma, rho):
    return multivariate_normal([0, 0], [[sigma**2, sigma**2 * rho], [sigma**2 * rho, sigma**2]]).cdf(np.array([x,y]))

In [4]:
def cdfmsi(x, y, sigma, rho):
    return norm.cdf(y, scale = sigma) - multivariate_normal([0, 0], [[sigma**2, sigma**2 * rho], [sigma**2 * rho, sigma**2]]).cdf(np.array([x,y]))

In [5]:
def cdfmis(x, y, sigma, rho):
    return norm.cdf(x, scale = sigma) - multivariate_normal([0, 0], [[sigma**2, sigma**2 * rho], [sigma**2 * rho, sigma**2]]).cdf(np.array([x,y]))

## Market clearing equation

In [6]:
def market_clear(Pa, Pb, prop, capA, capB, prefi, prefii, sigmai, sigmaii, cori, corii):
    f1 = prop*prefi*(1 - cdf(Pa, sigmai)) + (1 - prop)*prefii*(1 - cdf(Pa, sigmaii)) + prop*(1 - prefi)*cdfmsi(Pa, Pb, sigmai, cori) + (1 -prop)*(1 - prefii)*cdfmsi(Pa, Pb, sigmaii, corii) - capA
    f2 = prop*(1 - prefi)*(1 - cdf(Pb, sigmai)) + (1 - prop)*(1 - prefii)*(1 - cdf(Pb, sigmaii)) + prop*prefi*cdfmis(Pa, Pb, sigmai, cori) + (1 -prop)*prefii*cdfmis(Pa, Pb, sigmaii, corii) - capB
    return f1, f2

In [7]:
def solv_mc(prop, capA, capB, prefi, prefii, sigmai, sigmaii, cori, corii):
    func = lambda P: market_clear(P[0], P[1], prop, capA, capB, prefi, prefii, sigmai, sigmaii, cori, corii)
    return fsolve(func, x0 = np.array([0.5, 0.5]))

# Gale-Shapley simulation framework for two colleges

## Creates a table of students. Each vector in the list is a group, each element is the latent quality of a student.

### Arguments: Number of students, proportion of each group, mean of each group's latent quality, standard deviation of each group's latent quality
### Output: Table of students

In [8]:
def create_students(n_stud: int, prop_gp: list, mean_gp: list, std_gp: list):
    n_gp = prop_gp.__len__()
    n_gp1 = mean_gp.__len__()
    n_gp2 = std_gp.__len__()
    if ((n_gp==n_gp1) and (n_gp==n_gp2)) == False:
        print("group arguments of different sizes")
        return 0
    s = 0
    for i in range(n_gp):
        if ((prop_gp[i] < 0) or (prop_gp[i] > 1)):
            print("wrong proportions")
            return 0
        s = s + prop_gp[i]
    if ((s<0.99) or (s>1.01)):
        print("wrong proportions")
        return 0
    nb_stud_gp = [int(x*n_stud) for x in prop_gp]
    students = []
    for i in range(n_gp):
        stud_gp = np.random.normal(mean_gp[i], std_gp[i], nb_stud_gp[i])
        students.append(stud_gp)
    return students

## Creates noisy estimates of students qualities for a variable number of colleges. The output is a list of tables of the same format that the students table, one for each college. The bias and standard devaition are group-dependant and common to all colleges, but each college randomly draws an independant value.

### Arguments: Number of colleges, students latent qualities table, bias for each group, standard deviation for each group
### Output: List of student-like tables

### Grades: Latent quality + noise

In [9]:
def create_col_estim(n_col,students, noise_mean, noise_std):
    n_gp = noise_mean.__len__()
    n_gp1 = noise_std.__len__()
    n_gp2 = students.__len__()
    if ((n_gp==n_gp1) and (n_gp==n_gp2)) == False:
        print("group arguments of different sizes")
        return 0
    col_estim = []
    for i in range (n_col):
        estim = students.copy()
        for j in range(n_gp):
            m = students[j].size
            noise = np.random.normal(noise_mean[j], noise_std[j], m)
            estim[j] = students[j] + noise
        col_estim.append(estim)
    return col_estim

In [10]:
def create_col_estim_test(n_col,students, noise_mean, noise_std):
    n_gp = noise_mean.__len__()
    n_gp1 = noise_std.__len__()
    n_gp2 = students.__len__()
    if ((n_gp==n_gp1) and (n_gp==n_gp2)) == False:
        print("group arguments of different sizes")
        return 0
    col_estim = []
    for i in range (n_col):
        estim = students.copy()
        print('student: ', students)
        print('estim init: ', estim)
        for j in range(n_gp):
            m = students[j].size
            noise = np.random.normal(noise_mean[j], noise_std[j], m)
            estim[j] = students[j] + noise
        col_estim.append(estim)
        print('College estim:', col_estim)
    return col_estim

In [12]:
students = create_students(5, [0.4, 0.6], [10, 10], [2, 2])
col = create_col_estim_test(2, students, [10, 10], [2, 2])
print(students)
print (col)

student:  [array([11.10067986,  9.64628848]), array([13.9946326 , 12.45295617, 10.45627982])]
estim init:  [array([11.10067986,  9.64628848]), array([13.9946326 , 12.45295617, 10.45627982])]
College estim: [[array([24.9391254 , 19.75290902]), array([23.26688368, 21.95324891, 21.30586097])]]
student:  [array([11.10067986,  9.64628848]), array([13.9946326 , 12.45295617, 10.45627982])]
estim init:  [array([11.10067986,  9.64628848]), array([13.9946326 , 12.45295617, 10.45627982])]
College estim: [[array([24.9391254 , 19.75290902]), array([23.26688368, 21.95324891, 21.30586097])], [array([20.76105135, 16.93172862]), array([25.73300394, 21.91009228, 19.57382967])]]
[array([11.10067986,  9.64628848]), array([13.9946326 , 12.45295617, 10.45627982])]
[[array([24.9391254 , 19.75290902]), array([23.26688368, 21.95324891, 21.30586097])], [array([20.76105135, 16.93172862]), array([25.73300394, 21.91009228, 19.57382967])]]


### Grades: weighted mean between latent quality and noise, weight = noise_amount

In [11]:
def create_col_estim_var(n_col,students, noise_ammount, mean, noise_std):
    n_gp = mean.__len__()
    n_gp2 = students.__len__()
    if  (n_gp==n_gp2) == False:
        print("group arguments of different sizes")
        return 0
    col_estim = []
    for i in range (n_col):
        estim = students.copy()
        for j in range(n_gp):
            m = students[j].size
            group = np.zeros(m)
            for stud in range(m):
                noise = np.random.normal(mean[j], noise_std)
                group[stud] = (((1-noise_ammount[j])*students[j][stud] + noise_ammount[j]*noise)/(np.sqrt(noise_ammount[j]**2 + (1-noise_ammount[j])**2)))
            estim[j] = group
        col_estim.append(estim)
    return col_estim

In [14]:
group = np.random.multivariate_normal([0, 0], [[1, 0.5], [0.5, 1]], size = 5)
print(group)
print(np.transpose(group)[0])

[[-0.23537625 -0.58053967]
 [-0.86722006 -0.48676232]
 [-0.15434364 -0.58274058]
 [ 1.48647021  1.85785626]
 [ 0.4339343   1.05002749]]
[-0.23537625 -0.86722006 -0.15434364  1.48647021  0.4339343 ]


### Grades: bivariate gaussian

In [12]:
def create_col_estim_biv(n_col,students, mean, std, corr):
    n_gp = mean.__len__()
    n_gp2 = students.__len__()
    if  (n_gp==n_gp2) == False:
        print("group arguments of different sizes")
        return 0
    col_estim = [[], []]
    for j in range(n_gp):
        m = students[j].size
        group = np.random.multivariate_normal([mean[j], mean[j]], [[std[j]**2, (std[j]**2)*corr[j]], [(std[j]**2)*corr[j], std[j]]], size = m)
        estim_A = np.transpose(group)[0]
        estim_B = np.transpose(group)[1]
        col_estim[0].append(estim_A)
        col_estim[1].append(estim_B)
    return col_estim

## Creates student preferences. Returns the student table but with a list of preferences instead of the quality of each student. The preferences are sampled uniformly across all permutations.

### Arguments: Students table, number of colleges
### Output: Student-like table, each element is a permutation of [1, m] (m = number of colleges)

In [13]:
def create_stud_pref(students:list, n_col:int):
    stud_pref=[]
    for i in range(students.__len__()):
        group = []
        for j in range(students[i].size):
            group.append(random.sample(range(n_col), n_col))
        stud_pref.append(group)
    return stud_pref

In [17]:
create_stud_pref(students,3)

[[[2, 0, 1], [1, 0, 2]], [[1, 0, 2], [1, 0, 2], [2, 1, 0]]]

## Creates preferences in a 2 colleges problem, with control over the distribution of preferences.

### Arguments: Students table, proportion of students who prefer college 0
### Output: Same as function above

In [14]:
def create_stud_pref_2(students, prop_0):
    stud_pref=[]
    for i in range(students.__len__()):
        group = []
        for j in range(students[i].size):
            rand = np.random.rand()
            if rand < prop_0:
                group.append([0,1])
            else:
                group.append([1,0])
        stud_pref.append(group)
    return stud_pref

In [19]:
test_stud_pref = create_stud_pref_2(students,0.4)
print(test_stud_pref)

[[[1, 0], [1, 0]], [[1, 0], [0, 1], [0, 1]]]


## Creates preferences in a 2 colleges problem, with control over the distribution of preferences. The proportions are deterministically respected: the number of students with each prioriry profile is exact

### Arguments: Students table, proportion of students who prefer college 0
### Output: Same as function above

In [15]:
def create_stud_pref_3(students, prop_0):
    stud_pref=[]
    for i in range(students.__len__()):
        group = []
        for j in range(students[i].size):
            if j < students[i].size*prop_0:
                group.append([0,1])
            else:
                group.append([1,0])
        stud_pref.append(group)
    return stud_pref

In [21]:
def create_stud_pref_groups(students, prop_0):
    stud_pref=[]
    for i in range(students.__len__()):
        group = []
        for j in range(students[i].size):
            if j < students[i].size*prop_0[i]:
                group.append([0,1])
            else:
                group.append([1,0])
        stud_pref.append(group)
    return stud_pref

## Transforms a preferences table to a table containing, for each student, the rank she attributed to each college (element i, j = rank of college j for student i)

In [16]:
def pref_to_rank(stud_pref):
    stud_rank = []
    n_col = stud_pref[0][0].__len__()
    for i in range(stud_pref.__len__()):
        group = []
        for j in range (stud_pref[i].__len__()):
            pref = stud_pref[i][j]
            rank = np.zeros(n_col)
            for k in range(n_col):
                rank[stud_pref[i][j][k]] = k
            group.append(rank)
        stud_rank.append(group)
    return stud_rank

In [23]:
pref_to_rank(test_stud_pref)

[[array([1., 0.]), array([1., 0.])],
 [array([1., 0.]), array([0., 1.]), array([0., 1.])]]

## Takes a table of students grades (latent or estimated) and outputs a table of the same format containing the ranking of each student instead of her grade.

### Arguments: Students table or estimated qualities table
### Output: Student-like table, each element is the ranking of the student for the input quality esimator

In [24]:
def grades_to_rank (students):
    grades = []
    for i in range (students.__len__()):
        for j in range (students[i].size):
            grades.append(students[i][j])
    positions = sorted(range(len(grades)), key=lambda x : grades[x], reverse=True) #If student i is ranked j-th, then position[j] = i
    ranks = sorted(range(len(grades)), key=lambda x : positions[x], reverse=False) #If student i is ranked j-th, then rank[i] = j
    rank = []
    count = 0
    for i in range(students.__len__()):
        group = []
        for j in range(students[i].size):
            group.append(ranks[count + j])
        rank.append(group)
        count = count + students[i].size
    return rank

In [25]:
grades_to_rank(students)

[[2, 4], [0, 1, 3]]

In [26]:
students

[array([11.10067986,  9.64628848]),
 array([13.9946326 , 12.45295617, 10.45627982])]

## Applies the previous function to all colleges estimators. Outputs a list of one table per college.

### Arguments: Colleges estimations
### Output: List of ranking tables

In [27]:
def glob_grades_to_rank(col_estim):
    n_col = col_estim.__len__()
    glob_rank=[]
    for i in range(n_col):
        glob_rank.append(grades_to_rank(col_estim[i]))
    return glob_rank

In [28]:
glob_grades_to_rank(col)

[[[0, 4], [1, 2, 3]], [[2, 4], [0, 1, 3]]]

## Takes a student_like table and flattens it - i.e. remove the group format.

### Argument: Student table
### Output: Student table without groups (one less dimension)

In [29]:
def flatten_simple(student_table):
    flat = []
    for j in range (student_table.__len__()):
        for k in range (student_table[j].__len__()):
            flat.append(student_table[j][k])
    return flat

## Applies the previous function to a list of student tables.

### Input: List of student tables
### Output: List of flattened student tables

In [30]:
def flatten(student_table_list):
    n_col = student_table_list.__len__()
    flat = []
    for i in range (n_col):
        col = []
        for j in range (student_table_list[i].__len__()):
            for k in range (student_table_list[i][j].__len__()):
                col.append(student_table_list[i][j][k])
        flat.append(col)
    return flat

## Transform a flat matching into a group matching

In [31]:
def flat_to_group_match(student, matching):
    n_col = matching.__len__()
    n_group = student.__len__()
    group_begin = [0]
    t = 0
    for i in range(1, n_group):
        t = t + student[i-1].__len__()
        group_begin.append(t)
    group_match = []
    for i in range(n_col):
        group_match.append([])
        for j in range(n_group):
            group_match[i].append([])
        for stud in matching[i]:
            k = 1
            while (stud >= group_begin[k]):
                k += 1
                if (k == n_group):
                    break
            group_match[i][k-1].append(stud)
    return group_match
    

# Implementation of the standard Gale-Shapley algorithm (without groups).

## Test version with prints

### Arguments: Students preferences, list of college rabkings, list of college capacities
### Output: List of the students matched to each college

In [32]:
def GS_no_groups_test (stud_pref, glob_rank, capacities):
    start_time = time.time()
    n_col = glob_rank.__len__()
    prefs = flatten_simple(stud_pref)
    print("preferences_flat: ", prefs)
    n_stud = prefs.__len__()
    glob = flatten(glob_rank)
    print("ranks_flat ", glob)
    matching = []
    for i in range (n_col):
        matching.append([])
    print("empty matching: ", matching)
    worst_student = np.full(n_col, -1)
    not_finished = True
    pref_iterator = np.zeros(n_stud, dtype = int)
    matched = np.full(n_stud, False, dtype=bool)
    finished = np.full(n_stud, False, dtype=bool)
    print("matched initial: ", matched)
    loop = 0
    while (not_finished):
        loop += 1
        print("loop ", loop)
        matched_temp = matched.copy()
        for i in range(n_stud):
            print("student ", i)
            if (matched_temp[i] == True):
                print("matched at beginning of loop")
                continue
            if (matched[i]==False) and (finished[i]==False):
                print("not matched yet")
                application = prefs[i][pref_iterator[i]]
                if capacities[application] == 0:
                    pref_iterator[i] += 1
                    if pref_iterator[i] >= n_col:
                        finished[i] = True
                    continue
                print ("applies to ", application)
                print("capacity: ", capacities[application])
                print("current students: ", matching[application])
                print("ranking: ", glob[application])
                worst = worst_student[application]
                print("worst student: ", worst)
                if matching[application].__len__() < capacities[application]:
                    matching[application].append(i)
                    matched[i] = True
                    if worst == -1:
                        worst_student[application] = i
                    else:
                        if glob[application][i] > glob[application][worst]:
                            worst_student[application] = i
                    print("new students: ", matching[application])
                    print("matched: " , matched)
                    print("new worst: ", worst_student[application])
                else:
                    if glob[application][i] < glob[application][worst]:
                        matching[application].remove(worst)
                        matched[worst] = False
                        matching[application].append(i)
                        matched[i] = True
                        new_worst = sorted(matching[application], key = lambda x : glob[application][x], reverse = True)[0]
                        worst_student[application] = new_worst
                        print("added ", i, ", removed ", worst)
                        print("new students: ", matching[application])
                        print("matched: ", matched)
                        print("new worst: ", worst_student[application])
                pref_iterator[i] += 1
                print("pref_iterator: ", pref_iterator[i])
            if pref_iterator[i] >= n_col:
                finished[i] = True
        if (all(matched | finished) == True):
            not_finished = False
    print(loop, " loops")
    print("--- %s seconds ---" % (time.time() - start_time))
    return matching

## Normal version

In [33]:
def GS_no_groups (stud_pref, glob_rank, capacities):
    start_time = time.time()
    n_col = glob_rank.__len__()
    prefs = flatten_simple(stud_pref)
    n_stud = prefs.__len__()
    glob = flatten(glob_rank)
    matching = []
    for i in range (n_col):
        matching.append([])
    worst_student = np.full(n_col, -1)
    not_finished = True
    pref_iterator = np.zeros(n_stud, dtype = int)
    matched = np.full(n_stud, False, dtype=bool)
    finished = np.full(n_stud, False, dtype=bool)
    while (not_finished):
        matched_temp = matched.copy()
        for i in range(n_stud):
            if (matched_temp[i] == True): continue
            if (matched[i]==False) and (finished[i]==False):
                application = prefs[i][pref_iterator[i]]
                if capacities[application] == 0:
                    pref_iterator[i] += 1
                    if pref_iterator[i] >= n_col:
                        finished[i] = True
                    continue
                worst = worst_student[application]
                if matching[application].__len__() < capacities[application]:
                    matching[application].append(i)
                    matched[i] = True
                    if worst == -1:
                        worst_student[application] = i
                    else:
                        if glob[application][i] > glob[application][worst]:
                            worst_student[application] = i
                else:
                    if glob[application][i] < glob[application][worst]:
                        matching[application].remove(worst)
                        matched[worst] = False
                        matching[application].append(i)
                        matched[i] = True
                        new_worst = sorted(matching[application], key = lambda x : glob[application][x], reverse = True)[0]
                        worst_student[application] = new_worst
                pref_iterator[i] += 1
            if pref_iterator[i] >= n_col:
                finished[i] = True
        if (all(matched | finished) == True):
            not_finished = False
    
    #print("--- %s seconds ---" % (time.time() - start_time))
    return matching

## Implementation of GS with groups

In [34]:
def GS_groups (stud_pref, glob_rank, capacities):
    matching_flat = GS_no_groups(stud_pref, glob_rank, capacities)
    group_match = flat_to_group_match(stud_pref, matching_flat)
    return group_match

In [35]:
def GS_groups_test (stud_pref, glob_rank, capacities):
    matching_flat = GS_no_groups_test(stud_pref, glob_rank, capacities)
    group_match = flat_to_group_match(stud_pref, matching_flat)
    return group_match

## Compute colleges payoff

### Input: Student's table, matching

### Output: List of each college's payoff

In [36]:
def college_payoff (students, matching):
    payoffs = []
    mean_payoffs = []
    grades = flatten_simple(students)
    m = matching.__len__()
    for c in range(m):
        payoff = 0
        k = matching[c].__len__()
        if k == 0: 
            payoffs.append(payoff)
            mean_payoffs.append(payoff)
            continue
        for s in range(k):
            payoff += grades[matching[c][s]]
        payoffs.append(payoff)
        mean_payoffs.append(payoff/k)
    return payoffs, mean_payoffs

## Welfare metrics


In [77]:
def efficiency(matching,stud):
    
    return True

In [72]:
students

[array([ 1.91650084,  0.92616536, -3.37270269,  3.70274053, -4.30105836,
        -6.24575622, -5.59919668, -1.40005059]),
 array([-5.51805378,  2.21131858, -3.61387792, -1.00863485,  2.01715877,
        -3.4530862 , -1.92926133,  0.27473445,  1.09211006,  2.20804466,
         3.73157337,  0.75762401])]

In [147]:
cnt_gr = [int(ratio*n_stud) for ratio in prop_gp]
flatten_matching = invertGS(matching,n_stud)
index_cnt_gr = np.cumsum(cnt_gr).tolist()
index_cnt_gr.insert(0,0)
#index_cnt_gr = [x - 1 for x in index_cnt_gr]


In [148]:
gr_matching = []
for i in range(len(prop_gp)):
    gr_matching.append(flatten_matching[index_cnt_gr[i]:index_cnt_gr[i+1]])

In [149]:
gr_matching

[array([ 0.,  1., -1.,  0., -1., -1., -1.,  0.]),
 array([-1.,  1., -1., -1.,  0., -1.,  1., -1.,  0.,  1.,  0., -1.])]

In [150]:
n_gr = len(prop_gp)
matching_result = []
for i in range(n_gr):
    gr_matching_result = []
    for j in range(cnt_gr[i]):
        if gr_matching[i][j] == -1:
            gr_matching_result.append(-1)
        elif gr_matching[i][j] == stud_pref[i][j][0]:
            gr_matching_result.append(0)
        elif gr_matching[i][j] == stud_pref[i][j][1]:
            gr_matching_result.append(1)
    matching_result.append(gr_matching_result)

In [152]:
matching_result

[[1, 1, -1, 0, -1, -1, -1, 1], [-1, 0, -1, -1, 1, -1, 0, -1, 1, 0, 1, -1]]

In [73]:
matching

[[3, 0, 7, 12, 16, 18], [9, 14, 17, 1]]

In [74]:
stud_pref

[[[1, 0], [0, 1], [0, 1], [0, 1], [1, 0], [1, 0], [1, 0], [1, 0]],
 [[0, 1],
  [1, 0],
  [1, 0],
  [1, 0],
  [1, 0],
  [1, 0],
  [1, 0],
  [1, 0],
  [1, 0],
  [1, 0],
  [1, 0],
  [0, 1]]]

# Experiment

In [18]:
from BayesEst import bayesian_est

In [17]:
#Parameter to create student
n_stud = 1000
prop_gp = [0.4,0.6] #vector gamma in the paper
mean_gp = [0,0]
chi = [4,4] #std of latent qualities of groups

For this simulation, to keep things simple we will assume that the proportion of students who prefer the first college is the same across colleges. This proportion is set using parameter `prop_all_g_prefer`.

One thing I would like to explore if I have the chance is the correlation between latent quality and the preference, i.e. student with higher latent quality (their perception of themselves?) will tend to prefer college with higher score among all students.

In [19]:
#Parameter of college
n_col = 2 #Number of college
noise_mean = [0,0]
sigma = [2,2] #std of noise of each groups
prop_all_g_prefer = 0.4 #Thihs should be the vector beta in the paper

In [20]:
students = create_students(n_stud,prop_gp,mean_gp,std_gp = chi) #Create the latent qualities vector
grade_estimated = create_col_estim_test(n_col,students, noise_mean, sigma) #Create the college estimated vectors
capacities_rate = [0.3,0.2] #This should be the vector alpha in the paper.
capacities = [int(r * n_stud) for r in capacities_rate]#This vector must have the same length as the number of college, i.e. len(capacities) = n_col. The componenets should be integers
cori = 0.5
corii = 0.3 

student:  [array([-5.03898007e+00, -2.36513157e+00,  2.42079489e+00, -4.42236508e-01,
        2.25099510e+00,  3.78703842e+00,  1.42686691e-01,  4.99036126e+00,
        2.69386700e+00,  2.06114435e+00,  2.87031817e+00, -3.08036857e+00,
        5.47292906e+00, -4.45560607e+00, -1.86553169e+00, -6.02309295e+00,
        6.76425595e+00,  2.55433150e+00, -1.60044551e+00, -6.78986542e+00,
       -4.95972852e+00, -1.24702304e+00, -4.51144821e+00,  4.93616459e-01,
       -1.89412435e+00,  3.05344143e+00,  3.66543182e+00,  2.68149122e+00,
        4.58224594e+00, -5.35653892e-01, -2.25215184e+00, -6.80941438e+00,
       -2.77896846e+00, -1.01825451e+00, -2.39902732e+00, -7.71198933e+00,
       -1.24293562e+00, -7.54880937e+00, -1.64099048e-01, -3.70744252e+00,
        1.02331932e+00,  1.37439303e+00, -2.83711101e+00, -1.69247461e+00,
        1.11173836e+01, -6.84690384e+00,  4.01969099e+00,  2.00574559e+00,
        2.06372676e+00,  3.40922994e+00,  1.70842113e+00, -1.45299401e+00,
        1.6367

In [21]:
std_estimated = [np.sqrt(i**2 + j**2) for i,j in zip(chi,sigma)]

In [22]:
P_A,P_B = solv_mc(prop_gp[0],capacities_rate[0],prop_all_g_prefer,
                  prop_all_g_prefer,std_estimated[0],std_estimated[1],capacities_rate[1],cori,corii)

In [23]:
mean = [i+j for i,j in zip(mean_gp, noise_mean)]

In [24]:
estimated_grade = create_col_estim_biv(2,students,mean,std_estimated,[cori,corii])

  group = np.random.multivariate_normal([mean[j], mean[j]], [[std[j]**2, (std[j]**2)*corr[j]], [(std[j]**2)*corr[j], std[j]]], size = m)


In [25]:
stud_pref = create_stud_pref_2(students,prop_all_g_prefer)
print(stud_pref)

[[[1, 0], [0, 1], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [0, 1], [0, 1], [1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [0, 1], [1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [0, 1], [1, 0], [0, 1], [0, 1], [1, 0], [0, 1], [0, 1], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [0, 1], [0, 1], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0]

In [178]:
students

[array([ 1.91650084,  0.92616536, -3.37270269,  3.70274053, -4.30105836,
        -6.24575622, -5.59919668, -1.40005059]),
 array([-5.51805378,  2.21131858, -3.61387792, -1.00863485,  2.01715877,
        -3.4530862 , -1.92926133,  0.27473445,  1.09211006,  2.20804466,
         3.73157337,  0.75762401])]

In [203]:
estimated_grade

[[array([ 0.91479033, -2.19528788,  0.167232  , -1.21714092,  1.58921191,
          2.17776915,  0.42242899, -1.36828935]),
  array([-3.85949825, -2.5257588 , -0.47516237,  1.30090172,  2.71082315,
         -3.78974069, -2.07440888, -2.14942275,  4.41491275, -1.58810176,
         -0.35028374,  1.11701515])],
 [array([ 0.64384097, -0.70686492, -1.6185145 , -0.59389435,  0.69180973,
          1.85712272,  1.43927954, -0.0140683 ]),
  array([-0.56775562, -2.16931205,  0.23484932,  0.47528311,  1.89912779,
         -1.2518096 , -3.51908898, -0.26850606,  0.32672662, -1.41219291,
         -0.98689227, -0.53321305])]]

In [231]:
first_choice = []
no_choice = []
for i in zip(estimated_grade[0][0],estimated_grade[1][0]):
    first_choice.append(all(np.array(i) > [P_A,P_B]))
    no_choice.append(all(np.array(i) < [P_A,P_B]))
print(sum(first_choice))
print(sum(no_choice))
print(len(students[0]))
print(len(students[0]) - sum(no_choice)-sum(first_choice))

179
172
400
49


In [232]:
first_choice = []
no_choice = []
for i in zip(estimated_grade[0][1],estimated_grade[1][1]):
    first_choice.append(all(np.array(i) > [P_A,P_B]))
    no_choice.append(all(np.array(i) < [P_A,P_B]))
print(sum(first_choice))
print(sum(no_choice))
print(len(students[1]))
print(len(students[1]) - sum(no_choice)-sum(first_choice))

215
213
600
172


In [259]:
#First choice
print(179/400)
print(215/600)

0.4475
0.35833333333333334


In [260]:
#Second choice
print(49/400)
print(172/600)

0.1225
0.2866666666666667


In [261]:
#Unmatch
print(172/400)
print(213/600)

0.43
0.355


In [26]:
updated_grade_A1 = [bayesian_est(i,P_B,chi[0],sigma[0]) for i in estimated_grade[0][0]]
updated_grade_A2 = [bayesian_est(i,P_B,chi[0],sigma[0]) for i in estimated_grade[0][1]]

In [252]:
first_choice = []
no_choice = []
for i in zip(updated_grade_A1,estimated_grade[1][0]):
    first_choice.append(all(np.array(i) > [P_A,P_B]))
    no_choice.append(all(np.array(i) < [P_A,P_B]))
print(sum(first_choice))
print(sum(no_choice))
print(len(students[0]))
print(len(students[0]) - sum(no_choice)-sum(first_choice))

129
189
400
82


In [253]:
first_choice = []
no_choice = []
for i in zip(updated_grade_A2,estimated_grade[1][1]):
    first_choice.append(all(np.array(i) > [P_A,P_B]))
    no_choice.append(all(np.array(i) < [P_A,P_B]))
print(sum(first_choice))
print(sum(no_choice))
print(len(students[1]))
print(len(students[1]) - sum(no_choice)-sum(first_choice))

142
253
600
205


In [262]:
#First choice
print(129/400)
print(142/600)

0.3225
0.23666666666666666


In [263]:
#Second choice
print(82/400)
print(205/600)

0.205
0.3416666666666667


In [264]:
#Unmatch
print(189/400)
print(253/600)

0.4725
0.4216666666666667


In [27]:
updated_grade_B1 = [bayesian_est(i,P_A,chi[1],sigma[1]) for i in estimated_grade[1][0]]
updated_grade_B2 = [bayesian_est(i,P_A,chi[1],sigma[1]) for i in estimated_grade[1][1]]

In [266]:
first_choice = []
no_choice = []
for i in zip(updated_grade_A1,updated_grade_B1):
    first_choice.append(all(np.array(i) > [P_A,P_B]))
    no_choice.append(all(np.array(i) < [P_A,P_B]))
print(sum(first_choice))
print(sum(no_choice))
print(len(students[0]))
print(len(students[0]) - sum(no_choice)-sum(first_choice))

105
262
400
33


In [267]:
first_choice = []
no_choice = []
for i in zip(updated_grade_A2,updated_grade_B2):
    first_choice.append(all(np.array(i) > [P_A,P_B]))
    no_choice.append(all(np.array(i) < [P_A,P_B]))
print(sum(first_choice))
print(sum(no_choice))
print(len(students[1]))
print(len(students[1]) - sum(no_choice)-sum(first_choice))

96
359
600
145


In [268]:
#First choice
print(105/400)
print(96/600)

0.2625
0.16


In [269]:
#Second choice
print(33/400)
print(145/600)

0.0825
0.24166666666666667


In [270]:
#Unmatch
print(262/400)
print(359/600)

0.655
0.5983333333333334


In [28]:
first_choice = []
no_choice = []
for i in zip(estimated_grade[0][0],updated_grade_B1):
    first_choice.append(all(np.array(i) > [P_A,P_B]))
    no_choice.append(all(np.array(i) < [P_A,P_B]))
print(sum(first_choice))
print(sum(no_choice))
print(len(students[0]))
print(len(students[0]) - sum(no_choice)-sum(first_choice))

113
207
400
80


In [29]:
first_choice = []
no_choice = []
for i in zip(estimated_grade[0][1],updated_grade_B2):
    first_choice.append(all(np.array(i) > [P_A,P_B]))
    no_choice.append(all(np.array(i) < [P_A,P_B]))
print(sum(first_choice))
print(sum(no_choice))
print(len(students[1]))
print(len(students[1]) - sum(no_choice)-sum(first_choice))

124
283
600
193


In [30]:
#First choice
print(113/400)
print(124/600)

#Second choice
print(80/400)
print(193/600)

#Unmatch
print(207/400)
print(283/600)

0.2825
0.20666666666666667
0.2
0.32166666666666666
0.5175
0.4716666666666667
