## Import code

In [1]:
%load_ext autoreload
%autoreload 2 
# Autoreloads packages when changes are detected

from src.Data import * # Class containing the data
from src.Assignment import * # Class containing an assignment
from src.Model import * # Class containing a Pulp model used for optimization
from src.ModelColumnGen import * # Class containing Pulp model that optimizes using column generation
from src.ModelFracStable import * # Class containing a Pulp model for finding an fractionally stable stochastic improvement
from src.ModelHeuristicLP import * # Class containing heuristic that will use generated weakly stable matchings as an input
from src.DataGen import * # Generate student preferences and school priorities
from src.DA_STB import * # Generate DA assignment with single tie-breaking (STB)
from src.ErdilErgin import * # Erdil & Ergil their implementation of Stable Improvement Cycles algorithm + alternative implementation DA
from src.SICs import * # Adaptation of SICs algorithm to our code

from ReadData.Estonia import * # Read Estonian data

# If you get error that pulp and gurobipy are not installed: uncomment following lines in src/Data file:(keep exclamation marks)
    #! pip install pulp
    #! pip install gurobipy

## Column generation formulation

In [6]:
# Generate random data
parameters = DataGenParam(mean_pref = 5, capacity_ratio = 1) # Default parameters, except for mean_pref and capacity_ratio
#MyData = generate_data(n_students=400, n_schools=20, parameters = parameters, name="Test_DataGen", print_data=False, seed = 15)
[n_students,n_schools,seed] = [40,10,10]
name = str(n_students) + "_" + str(n_schools) + "_" + str(seed)
MyData = generate_data(n_students = n_students, n_schools=n_schools, parameters = parameters, name=name, print_data=False, seed = seed)

In [7]:
# Print data if desired
print(MyData)

The data instance has the following properties: 

	40 students.
	10 schools. 

 	PREFERENCES:
	0	0 8 9 
	1	4 9 2 
	2	5 6 1 3 
	3	2 5 8 6 3 
	4	4 5 2 1 
	5	3 6 0 
	6	6 2 1 5 
	7	3 0 8 9 1 6 
	8	2 5 8 4 0 1 
	9	3 9 1 5 0 
	10	8 3 2 1 
	11	8 3 0 
	12	8 2 1 3 6 
	13	9 2 4 6 
	14	9 8 3 2 4 
	15	1 9 8 3 6 4 
	16	4 9 8 6 2 
	17	2 8 9 4 1 
	18	9 8 3 6 2 
	19	0 2 6 3 
	20	0 2 8 3 
	21	2 9 0 4 
	22	1 7 0 9 2 
	23	9 5 3 4 2 
	24	3 9 4 5 
	25	9 0 1 8 5 3 
	26	9 4 3 1 6 
	27	3 7 2 
	28	6 3 2 0 1 
	29	3 9 5 0 
	30	5 3 0 
	31	3 2 9 6 1 
	32	8 3 9 6 
	33	3 6 4 7 
	34	1 3 4 6 2 
	35	9 1 
	36	8 1 5 
	37	1 6 0 8 
	38	1 7 2 9 5 
	39	2 0 4 9 


 	CAPACITIES & PRIORITIES:
	0	1	{34 26 15 7 5 37 29 28 0 13 4 24 14} {32 21 36 9 11 8 22 35 38 33 3 27 1} {31 17 16 23 39 18 19 2 30 10 25 20 12 6} 
	1	2	{2 3 20 22 30 5 33 27 31 32 11 10 19} {23 34 35 1 8 21 29 9 4 28 37 26 25} {7 13 14 17 12 18 39 16 6 0 24 38 36 15} 
	2	2	{38 29 20 27 34 28 19 22 25 18 36 0 3} {6 16 4 21 17 15 37 26 13 14 23 2 31} {30 11 33 35 5 

In [8]:
# Generate the assignment from DA with Single Tie-Breaking with n_iter samples
n_iter = 100
bool_SIC = False # Run SICs immediately?
A = DA_STB(MyData, n_iter, 'GS', bool_SIC, 0, True)

# Find Stable improvement cycles à la Erdil and Ergin (2008)
A_SIC = SIC_all_matchings(MyData, A, True)

Students in ties: 40
Tie-breaking rules needed: 815915283247897734345611269596115894272000000000
Tie-breaking rules sampled: 100


Generate DA_STB: 100%|██████████| 100/100 [00:00<00:00, 386.81perturb/s]


STATISTICS BEFORE:

Average rank: 1.7717499999999997.

STATISTICS AFTER:

Average rank: 1.6742500000000007.



In [13]:
########################################
########## Next step: ##################
## Use A_SIC as a warm start solution ##
## And A as a reference point for SD ###
########################################

# Solve the formulations
MyModel = ModelColumnGen(MyData, A_SIC, A.assignment, True)
    # Will use matchings in A_SIC to sd_dominate the assignment 'A.assignment' (found by DA)
    
q = MyModel.Solve("TRAD", "GUROBI", print_log=False, print_out=True)
#q = MyModel.Solve("STABLE", "GUROBI", True)

Master: add decision variables: 100%|██████████| 92/92 [00:00<00:00, 2056.68var/s]


weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matching M 0.01
weight matc

Pricing exclude found matchings: 100%|██████████| 92/92 [00:00<00:00, 1076.37matchings/s]

Number of matchings: 92
ITERATION: 1

 ****** MASTER ****** 

Objective master:  1.646000000000001
Constant term 0.19999999999999854

 ****** PRICING ****** 

Set parameter BestObjStop to value -1.9999999999999854e-01





		Objective pricing:  0.3249999999999893
New number of matchings: 93
ITERATION: 2

 ****** MASTER ****** 

Objective master:  1.646000000000001
Constant term 0.19999999999999854

 ****** PRICING ****** 

Set parameter BestObjStop to value -1.9999999999999854e-01
		Objective pricing:  0.12499999999999233
New number of matchings: 94
ITERATION: 3

 ****** MASTER ****** 

Objective master:  1.646000000000001
Constant term 0.19999999999999854

 ****** PRICING ****** 

Set parameter BestObjStop to value -1.9999999999999854e-01
		Objective pricing:  0.19999999999999063
New number of matchings: 95
ITERATION: 4

 ****** MASTER ****** 

Objective master:  1.646000000000001
Constant term 0.19999999999999854

 ****** PRICING ****** 

Set parameter BestObjStop to value -1.9999999999999854e-01
		Objective pricing:  0.12499999999999052
New number of matchings: 96
ITERATION: 5

 ****** MASTER ****** 

Objective master:  1.646000000000001
Constant term 0.19999999999999854

 ****** PRICING ****** 

Set 

In [None]:
print(q.assignment)
print(MyModel.Xdecomp)
print(MyModel.Xdecomp_coeff)

In [None]:
# Asses and visualize the difference
name_diff = name + '_diff'
diff = Assignment(MyData, q.assignment - A.assignment, name_diff)
diff.visualize()

## Estonian data

In [None]:
file_path = "ReadData/data_anon.txt" 
MyData = read_dataEstonia(file_path)
print(MyData)

In [None]:
# Generate the assignment from DA with Single Tie-Breaking with n_iter samples
n_iter = 1000
bool_SIC = True # Run SICs immediately?
A = DA_STB(MyData, n_iter, 'GS', bool_SIC, 0, True)
print(A.assignment)

In [None]:
# Solve the formulations
MyModel = ModelColumnGen(MyData, A, True)
q = MyModel.Solve("TRAD", "GUROBI", print_log=False, print_out=True)
#q = MyModel.Solve("STABLE", "GUROBI", True)

In [None]:
# Generate a second subset of matchings, and test column generation with that
n_iter = 1000
bool_SIC = True # Run SICs immediately?
B = DA_STB(MyData, n_iter, 'GS', bool_SIC, 1, True)
A.M_set = copy.deepcopy(B.M_set)
MyModel2 = ModelColumnGen(MyData, A, True)
qq = MyModel2.Solve("TRAD", "GUROBI", print_log=True, print_out=False)


## Run initial IP on generated data
Specify the number of students and schools, and run the models for this data.

In [7]:
# Generate random data
parameters = DataGenParam(mean_pref = 6, capacity_ratio = 1) # Default parameters, except for mean_pref and capacity_ratio
#MyData = generate_data(n_students=400, n_schools=20, parameters = parameters, name="Test_DataGen", print_data=False, seed = 15)
MyData = generate_data(n_students=1000, n_schools=50, parameters = parameters, name="Test_DataGen_EE_vs_GS", print_data=False, seed = 0)

In [8]:
# Print data if desired
print(MyData)

The data instance has the following properties: 

	1000 students.
	50 schools. 

 	PREFERENCES:
	0	37 32 31 38 18 6 
	1	39 17 32 47 33 43 0 
	2	29 5 42 46 2 
	3	7 40 34 47 
	4	37 28 24 0 39 17 
	5	6 27 18 30 
	6	0 10 27 30 5 
	7	30 41 14 24 44 34 
	8	12 46 18 
	9	39 21 37 45 42 35 
	10	16 44 7 14 32 
	11	37 3 15 18 41 0 
	12	42 17 29 45 3 
	13	27 12 36 40 14 41 
	14	10 24 6 16 20 36 
	15	16 27 32 39 14 
	16	3 14 28 42 32 29 1 
	17	24 16 17 46 7 3 
	18	16 31 14 36 35 22 
	19	40 16 29 26 3 18 28 
	20	46 37 22 10 14 2 
	21	28 21 32 26 18 16 
	22	2 47 5 1 14 9 
	23	39 7 17 0 
	24	45 16 39 46 32 
	25	14 24 39 35 41 
	26	27 32 17 10 
	27	35 0 39 2 29 27 
	28	6 29 37 24 34 
	29	35 12 45 17 
	30	28 5 48 32 
	31	22 28 1 12 46 4 
	32	7 28 38 41 40 36 
	33	17 29 40 6 3 23 16 
	34	0 26 18 42 17 
	35	32 27 15 22 17 12 3 
	36	3 13 6 11 9 21 16 
	37	0 3 47 15 5 31 28 
	38	20 36 45 
	39	20 18 27 12 7 22 36 
	40	0 8 39 17 28 3 
	41	27 1 12 22 28 6 
	42	22 39 34 17 24 40 
	43	14 7 46 41 40 23 
	44	2 37 

In [None]:
# Generate the assignment from DA with Single Tie-Breaking with n_iter samples
n_iter = 20
bool_SIC = True # Run SICs immediately?
A = DA_STB(MyData, n_iter, 'GS', bool_SIC, 0, True)
print(A.assignment)

Students in ties: 1000
Tie-breaking rules needed: 40238726007709377354370243392300398571937486421071463254379991042993851239862902059204420848696940480047998861019719605863166687299480855890132382966994459099742450408707375991882362772718873251977950595099527612087497546249704360141827809464649629105639388743788648733711918104582578364784997701247663288983595573543251318532395846307555740911426241747434934755342864657661166779739666882029120737914385371958824980812686783837455973174613608537953452422158659320192809087829730843139284440328123155861103697680135730421616874760967587134831202547858932076716913244842623613141250878020800026168315102734182797770478463586817016436502415369139828126481021309276124489635992870511496497541990934222156683257208082133318611681155361583654698404670897560290095053761647584772842188967964624494516076535340819890138544248798495995331910172335555660213945039973628075013783761530712776192684903435262520001588853514733161170210396817592151090778801939317

Generate DA_STB: 100%|██████████| 20/20 [00:46<00:00,  2.30s/perturb]


[[0.   0.   0.   ... 0.   0.   0.  ]
 [0.   0.   0.   ... 0.45 0.   0.  ]
 [0.   0.   0.   ... 0.   0.   0.  ]
 ...
 [0.   0.   0.   ... 0.   0.   0.  ]
 [0.   0.   1.   ... 0.   0.   0.  ]
 [0.   0.   0.   ... 0.   0.   0.  ]]


In [None]:
# Solve the formulations
    # 'IMPR_RANK' refers to minimizing the expected rank while ensuring ex-post stability
    # 'STABLE' refers to maximizing the fraction of STABLE matchings in the decomposition
MyModel = Model(MyData, A, True)
q = MyModel.Solve("IMPR_RANK", "GUROBI", True)
#q = MyModel.Solve("STABLE", "GUROBI", True)

In [None]:
# Print the solution
MyModel.print_solution()

In [None]:
# Asses and visualize the difference
name_diff = name + '_diff'
diff = Assignment(MyData, q.assignment - A.assignment, name_diff)
diff.visualize()

## Heuristic subset weakly stable matchings
Runs an LP that minimizes the average rank while stochastically dominating the DA assignment with single-tie breaking, and while only using matchings that were used to compute DA probabilities. This is a heuristic.

In [None]:
# Generate random data
parameters = DataGenParam(mean_pref = 5, capacity_ratio = 1) # Default parameters, except for mean_pref and capacity_ratio
#MyData = generate_data(n_students=400, n_schools=20, parameters = parameters, name="Test_DataGen", print_data=False, seed = 15)
MyData = generate_data(n_students=10, n_schools=4, parameters = parameters, name="500_25", print_data=False, seed = 1)

In [None]:
# Print data if desired
print(MyData)

In [None]:
# Generate the assignment from DA with Single Tie-Breaking with n_iter samples
n_iter = 1000
bool_SIC = True # Run SICs immediately?
A = DA_STB(MyData, n_iter, 'GS', bool_SIC, 0, True)
print(A.assignment)

In [None]:
# Solve the formulations
    # 'IMPR_RANK' refers to minimizing the expected rank while ensuring ex-post stability
MyModel = ModelHeuristicLP(MyData, A, True)
q = MyModel.Solve("IMPR_RANK", "GUROBI", True)
#q = MyModel.Solve("STABLE", "GUROBI", True)

In [None]:
# Print the solution
MyModel.print_solution()

In [None]:
print(np.max(q.assignment-A.assignment))

In [None]:
# Asses and visualize the difference
diff = Assignment(MyData, q.assignment - A.assignment, "40_12_Diff")
diff.visualize()

## Heuristic Fractional stable random matching

In [None]:
# Solve the formulations
    # 'IMPR_RANK' refers to minimizing the expected rank while ensuring ex-post stability
MyModelFS = ModelFracStable(MyData, A, True)
q = MyModelFS.Solve("IMPR_RANK", "GUROBI", True)
#q = MyModel.Solve("STABLE", "GUROBI", True)

In [None]:
# FInd decomposition over weakly stable matchings of this fractionally stable matching (if decomposition exists)
MyModelCHECK = Model(MyData, q, True)
q_check = MyModelCHECK.Solve("STABLE", "GUROBI", True)

## Run code manual data
Manually enter data.

In [None]:
# Define preferences of the students
# 'pref[i][k]' contains the position of the k-th ranked school in the preferences.
# We assume the preferences to be strict
# Note that preferences can be strict. We indicate this by a tuple () in the list.

# Example paper
n_stud = 4
n_schools = 4

file_name = "Ex_paper"

# Preferences students
pref = [['1', '3', '4', '2'],
       ['1','4','3','2'],
       # ['1', '4'],
       ['2','3', '4', '1'],
       ['2', '4', '3', '1']]

# Priorities schools
prior = [[('A', 'B'), 'C', 'D'],
        [('C', 'D'), 'A', 'B'],
        ['B', 'D', ('A', 'C')],
        ['A', 'C', ('B', 'D')]]


# Capacities schools
cap = [1,1,1,1]

# Names of students and schools
ID_stud = ["A", "B", "C", "D"]
ID_school = ["1", "2", "3", "4"]

# Also create the random matching upon which we want to improve
p = np.zeros(shape=(n_stud, n_schools))
p[0][0] = 1/2
p[1][0] = 1/2
p[2][1] = 1/2
p[3][1] = 1/2
p[0][2] = 3/8
p[2][2] = 3/8
p[1][3] = 3/8
p[3][3] = 3/8
p[0][3] = 1/8
p[2][3] = 1/8
p[1][2] = 1/8
p[3][2] = 1/8

In [None]:
# Generate a data instance (and print it)
MyData = Data(n_stud, n_schools, pref, prior, cap, ID_stud, ID_school, file_name)
print(MyData)


In [None]:
# Generate an Assignment instance (and visualize it)
A = Assignment(MyData, p, "Ex_paper")

# To visualize assignment
A.visualize()

In [None]:
# Solve the formulations
    # 'IMPR_RANK' refers to minimizing the expected rank while ensuring ex-post stability
    # 'STABLE' refers to maximizing the fraction of STABLE matchings in the decomposition
MyModel = Model(MyData, A, False)
q = MyModel.Solve("IMPR_RANK", "GUROBI", False)
#q = MyModel.Solve("STABLE", "GUROBI", True)

In [None]:
# Print the solution
MyModel.print_solution()

In [None]:
# Asses and visualize the difference
diff = Assignment(MyData, q.assignment - p, "Ex_paper_Diff")
diff.visualize()

In [None]:
tuple([1,2])

# Erdil & Ergin experiments

In [2]:
# Define preferences of the students
# 'pref[i][k]' contains the position of the k-th ranked school in the preferences.
# We assume the preferences to be strict
# Note that preferences can be strict. We indicate this by a tuple () in the list.

# Example paper
n_stud = 4
n_schools = 4

file_name = "Ex_paper"

# Preferences students
pref = [['1', '3', '4', '2'],
       ['1','4','3','2'],
       # ['1', '4'],
       ['2','3', '4', '1'],
       ['2', '4', '3', '1']]

# Priorities schools
prior = [[('A', 'B'), 'C', 'D'],
        [('C', 'D'), 'A', 'B'],
        ['B', 'D', ('A', 'C')],
        ['A', 'C', ('B', 'D')]]


# Capacities schools
cap = [1,1,1,1]

# Names of students and schools
ID_stud = ["A", "B", "C", "D"]
ID_school = ["1", "2", "3", "4"]

# Also create the random matching upon which we want to improve
p = np.zeros(shape=(n_stud, n_schools))
p[0][0] = 1/2
p[1][0] = 1/2
p[2][1] = 1/2
p[3][1] = 1/2
p[0][2] = 3/8
p[2][2] = 3/8
p[1][3] = 3/8
p[3][3] = 3/8
p[0][3] = 1/8
p[2][3] = 1/8
p[1][2] = 1/8
p[3][2] = 1/8

# Generate a data instance (and print it)
MyData = Data(n_stud, n_schools, pref, prior, cap, ID_stud, ID_school, file_name)
print(MyData)


The data instance has the following properties: 

	4 students.
	4 schools. 

 	PREFERENCES:
	A	1 3 4 2 
	B	1 4 3 2 
	C	2 3 4 1 
	D	2 4 3 1 


 	CAPACITIES & PRIORITIES:
	1	1	{A B} C D 
	2	1	{C D} A B 
	3	1	B D {A C} 
	4	1	A C {B D} 



In [3]:
# Do tie-breaking
permut = generate_permutations_STB(MyData, 1, seed = 3)

prior_new = generate_strict_prior_from_perturbation(MyData, permut[0])

Data_new_prior = Data(MyData.n_stud, MyData.n_schools, MyData.pref, prior_new, MyData.cap, MyData.ID_stud, MyData.ID_school, MyData.file_name)

# DA code Erdil & Ergin
N = transform_pref_us_to_EE(Data_new_prior)
A = transform_prior_us_to_EE(Data_new_prior)
Q = MyData.cap
result = DA_Erdil_ergin(N,A,Q)
print(result)
print(type(result['proposeoffset']))

M = gale_shapley(Data_new_prior)
print(type(M))
print(M)

M = np.zeros((4,4))
M[0,1] = 1
M[1,2] = 1
M[2,3] = 1
M[3,0] = 1
print(M)
print(M[0,1] == 1)

M_out = SIC(MyData, M, False)
print(M_out)

{'stable_all': [{1}, {3}, {2}, {0}], 'proposeoffset': [2, 0, 1, 0]}
<class 'list'>
<class 'numpy.ndarray'>
[[0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]]
[[0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]]
True
[[1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]]


In [5]:
bool_SIC = False # Run SICs immediately?
AssignmentGS = DA_STB(MyData, 10, 'GS', bool_SIC, 0)
ASIC = SIC_all_matchings(MyData, AssignmentGS, True)

print(AssignmentGS.assignment)




STATISTICS BEFORE:

Average rank: 1.625.

STATISTICS AFTER:

Average rank: 1.625.

[[0.7 0.  0.2 0.1]
 [0.3 0.  0.2 0.5]
 [0.  0.6 0.4 0. ]
 [0.  0.4 0.2 0.4]]
