In [None]:
%load_ext autoreload
%autoreload 2 
# Autoreloads packages when changes are detected

from src.Data import * # Class containing the data
from src.Assignment import * # Class containing an assignment
from src.Model import * # Class containing a Pulp model used for optimization
from src.ModelColumnGen import * # Class containing Pulp model that optimizes using column generation
from src.DataGen import * # Generate student preferences and school priorities
from src.DataGenEE import * # Generate data according to the method by Erdil & Ergin (2008)
from src.EADAM import * # EADAM implementation
from src.DA_STB import * # Generate DA assignment with single tie-breaking (STB)
from src.ErdilErgin import * # Erdil & Ergil their implementation of Stable Improvement Cycles algorithm + alternative implementation DA
from src.SICs import * # Adaptation of SICs algorithm to our code
from src.Simulations import * # Run solution methods for multiple data instances
from src.SimulationsEvaluate_final import * # Create plots for the generated results

from ReadData.Estonia.Estonia import * # Read Estonian data

import pickle # to export data

# If you get error that pulp and gurobipy are not installed: uncomment following lines in src/Data file:(keep exclamation marks)
    #! pip install pulp
    #! pip install gurobipy

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Run Simulations

To run the simulations, fill in the following parameters:

**Main parameters**
* `n_students_schools`: vector containing pairs with number of students and schools. E.g., `[[40,8],[80,16]]` will first run `n_iterations_simul` instances with 40 students and 8 schools, and then the same number with 80 students and 16 schools
* `compare_solutions`: Choose which solutions you evaluate. All possible options are:
    * `SD_UPON_DA`: sd-improve upon DA with random tie-breaking, using the matchings generated by Stable Improvement Cycles (SICs) of Erdil & Ergin
    * `SD_UPON_EE`: Same, but sd-improving upon the solution of Erdil & Erdin (running SICs for each matching found by DA with tie-breaking)
    * `SD_UPON_EADA`: sd-improve upon EADA
    * `SD_UPON_DA_SAMPLE`: generate `n_sol_extra_sample` additional weakly stable matchings (by running DA with random tie-breaking + SICs), and then sd-improve upon DA.
    * `SD_UPON_EE_SAMPLE`: generate `n_sol_extra_sample` additional weakly stable matchings (by running DA with random tie-breaking + SICs), and then sd-improve upon EE.
    * `SD_UPON_EADA_SAMPLE`: generate `n_sol_extra_sample` additional weakly stable matchings (by running DA with random tie-breaking + SICs), and then sd-improve upon EADA.
* `n_iterations_simul`: number of instances evaluated for each pair of (number of students, number of schools)

**Detailed parameters**
* `bool_ColumnGen`: boolean to determine whether or not to run column generation (True: run column generation, False: don't run)
* `bool_supercolumn`: boolean to determine whether you want to add an artificial column to the model when the model is infeasible?
* `bool_identical_students`: boolean to determine whether identical students are given identical probabilities. (Note: difference between probabilities is determined in `ModulColumnGen.py` (lines 179, 181))

* `n_match`: Number of matchings used to estimate DA (e.g., 1000)
* `time_lim`: time limit for the column generation framework (in seconds)
* `n_sol_extra_example`: Number of extra solutions added to solution pool for sampling methods

* `n_sol_pricing`: Number of solutions added to main problem by each iteration of the pricing problem
* `n_sol_pricingMinRank`: Number of solutions found by pricing that minimizes average rank
* `gap_solutionpool_pricing`: Optimality gap for solutions that are added to main problem by the pricing problem
* `MIPGap`: Initial optimality gap for main problem

* `seed`: seed pseudo-random number generator

* `alpha` and `beta`: parameters for data generation, see Erdil & Ergin (2008) for detailed explanation. Could be controled by setting `ALPHA_INCREMENT` and `BETA_INCREMENT` (to evaluate for range of values), or by simply add the desired values in the array

* `print_intermediate`: boolean to control which output is printed

In [None]:
# MAIN PARAMETERS

n_students_schools = [[40,8],[80,16]]

compare_solutions = ["SD_UPON_DA", "SD_UPON_EE", "SD_UPON_EADA", "SD_UPON_DA_SAMPLE", "SD_UPON_EE_SAMPLE", "SD_UPON_EADA_SAMPLE"]
    # All options are: ["SD_UPON_DA", "SD_UPON_EE", "SD_UPON_EADA", "SD_UPON_DA_SAMPLE", "SD_UPON_EE_SAMPLE", "SD_UPON_EADA_SAMPLE"]

n_iterations_simul = 10

# DETAILED PARAMETERS
bool_ColumnGen = True
bool_supercolumn = True
bool_identical_students = False

n_match = 1000 
time_lim = 600

n_sol_extra_sample = 10000 # Number of extra solutions added to solution pool for sampling methods

n_sol_pricing = 500 # Number of solutions found by traditional pricing (minimizing reduced cost)
n_sol_pricingMinRank = 500 # Number of solutions found by pricing that minimizes average rank
gap_solutionpool_pricing = 1.5 
MIPGap = 0.5

seed = 0

ALPHA_INCREMENT = 0.20
#BETA_INCREMENT = 0.5

alpha = list(np.arange(0, 1.0, ALPHA_INCREMENT)) + [1.0]
#beta = list(np.arange(0, 1.0, BETA_INCREMENT)) + [1.0]
#alpha = [0.5]
beta = [0.2,0.6]

print_intermediate = False

S_vector = SimulationCG(
    COMPARE_SOLUTIONS = compare_solutions, 
    n_students_schools = n_students_schools, 
    alpha=alpha,
    beta = beta,
    n_iterations_simul = n_iterations_simul, 
    n_match = n_match,
    n_sol_pricingMinRank = n_sol_pricingMinRank,
    n_sol_extra_sample = n_sol_extra_sample, 
    time_lim = time_lim,
    seed = seed,
    n_sol_pricing = n_sol_pricing,
    gap_pricing = gap_solutionpool_pricing,
    MIPGap = MIPGap,
    bool_ColumnGen = bool_ColumnGen,
    bool_supercolumn = bool_supercolumn,
    bool_identical_students = bool_identical_students,
    print_out = print_intermediate)



Data instances:   1%|          | 1/126 [06:47<14:07:57, 407.02s/inst]

# Evaluate results

In [None]:
# Fill in the name of the generated csv file with the results of the simulations
name = "SIM_2025-12-02_220148"

# Plots comparing SD-DA-CG with DA and EE
plot_avg_rank_alpha_beta_final(name, True) # Plot average improvement in rank among improving students for SD-DA-CG and EE
plot_fraction_impr_alpha_beta_final(name, True) # Plot fraction of improving students for SD-DA-CG and EE

# Plots comparing performance of SD-DA-heur, SD-DA-CG and SD-DA-SAMPLE-10000 
#(Careful, the functions below might no longer work if you change vector 'compare_solutions' above))
evaluate_CG(name, True)

# Plot comparison of SD-EADA-CG with EADA, EE, and SD-DA-CG
plot_avg_rank_EADA_alpha_beta_final(name, True)

# Plot comparison of SD-EE-CG with EE, and SD-DA-CG
plot_avg_rank_EE_alpha_beta_final(name, True)
plot_fraction_impr_EE_alpha_beta_final(name, True)