In [1]:
import pandas as pd

import datareader
import problemsolver
import solutions

In [2]:
FILE_PREFERENCES = "Voorkeuren en wensen groepsindeling - nieuw.xlsx"
FILE_GROUPS_TO = "groepen_2025.xlsx"
FILE_NOT_TOGETHER = "niet_samen_2025.xlsx"

In [3]:
groups_to = pd.read_excel(FILE_GROUPS_TO, index_col=0).to_dict(orient="index")
processor = datareader.VoorkeurenProcessor(FILE_PREFERENCES)
preferences = processor.process(all_to_groups=list(groups_to.keys()))
students_info = processor.get_students_meta_info()
not_together = datareader.read_not_together(FILE_NOT_TOGETHER)

In [4]:
df_groups_to = pd.DataFrame.from_dict(groups_to, orient="index")
display(df_groups_to.assign(Totaal=lambda df: df.sum("columns")))


df_students = pd.DataFrame.from_dict(students_info, orient="index")
print(df_students[["Jongen/meisje"]].value_counts())
print(df_students["Stamgroep"].value_counts())

Unnamed: 0,Jongens,Meisjes,Totaal
Blauw,11,9,20
Geel,8,11,19
Groen,10,8,18
Oranje,6,11,17


Jongen/meisje
Meisje           24
Jongen           19
Name: count, dtype: int64
Stamgroep
Kaboutertuin    12
Vlindertuin     12
Torteltuin      10
Tovertuin        9
Name: count, dtype: int64


In [6]:
prob_name = "OB_MB_2025_4"
filename = f"{prob_name}.json"
ps = problemsolver.ProblemSolver(
    preferences,
    students_info,
    groups_to,
    not_together,
    max_imbalance_boys_girls_total=6,
    optimize="lexmaxmin",
)


# ps.run(filename=filename, n_solutions=2, overwrite=True)
ps.run(filename=filename, distance=10, overwrite=True)

Level 0, step 1 done, 0.5714285714286
Level 0, step 2 done, 41
Level 1, step 1 done, 0.6073696169273
Level 1, step 2 done, 40
Level 2, step 1 done, 0.6610549765808
Level 2, step 2 done, 39
Level 3, step 1 done, 0.6666676666667
Level 3, step 2 done, 32
Level 4, step 1 done, 0.6673007258392
Level 4, step 2 done, 31
Level 5, step 1 done, 0.6897763035352
Level 5, step 2 done, 31
Level 6, step 1 done, 0.709125996087
Level 6, step 2 done, 29
Level 7, step 1 done, 0.7387971250363
Level 7, step 2 done, 27
Level 8, step 1 done, 0.7741945483871
Level 8, step 2 done, 26
Level 9, step 1 done, 0.7846792049872
Level 9, step 2 done, 23




KeyError: 'binary'

In [None]:
filenames = [
    "OB_MB_2025_1.json",
    "OB_MB_2025_2.json",
    "OB_MB_2025_3.json",
    "OB_MB_2025_4_1.json",
]
for filename in filenames:
    sa = solutions.SolutionAnalyzer(
        filename, preferences, processor.input, students_info
    )

    sa.show_all()

FileNotFoundError: [Errno 2] No such file or directory: 'OB_MB_2025_1'

# Analysis

In [None]:
solution_performance_overview = dict()
known_hashes = set()
relevant_solutions = []
optimization_options = [
    "studentsatisfaction",
    "weighted_preferences",
    "least_satisfied",
]
for optimize in optimization_options:
    for max_imbalance_boys_girls_year in range(1, 4):
        ps = problemsolver.ProblemSolver(
            preferences,
            students_info,
            groups_to,
            optimize=optimize,
            max_imbalance_boys_girls_year=max_imbalance_boys_girls_year,
        )
        prob = ps.run()
        sa = solutions.SolutionAnalyzer(prob, preferences, processor.input)
        new_hash = sa.get_hash()
        if new_hash not in known_hashes:
            known_hashes.add(new_hash)
            relevant_solutions.append(sa)
            print(f"{optimize=} {max_imbalance_boys_girls_year=}")
            display(sa.display_groepsindeling())

            solution_performance_overview[(optimize, max_imbalance_boys_girls_year)] = (
                sa.solution_performance
            )

In [None]:
pd.DataFrame.from_dict(solution_performance_overview, orient="index")

In [None]:
for sol in relevant_solutions:
    display(sol.display_groepsindeling())
    display(sol.solution_performance)
    display(sol.display_student_performance())
    print("-" * 30)