In [38]:
# Step 1: Preprocessing for CP-SAT Classroom Allocation

import pandas as pd

# Load your synthetic student dataset
df = pd.read_csv("synthetic_student_data.csv")

# Add derived features if needed
df['Isolation_Score'] = 7 - df['isolated']
df['COVID_Score'] = 7 - df['COVID']
df['Wellbeing_Score'] = (df['pwi_wellbeing'] + df['Isolation_Score'] + df['COVID_Score']) / 3
df['Bullying_Score'] = df['criticises']
df['Friendliness_Score'] = df['School_support_engage']
df['Student_Index'] = range(len(df))

# Use only required fields
student_data = df[['Student_Index', 'Academic_Performance', 'Wellbeing_Score', 
                   'Bullying_Score', 'Friendliness_Score', 'language']]

num_students = len(student_data)
print("✅ Loaded", num_students, "students")


✅ Loaded 1000 students


In [39]:
from ortools.sat.python import cp_model
from collections import defaultdict
import math

model = cp_model.CpModel()

# Class parameters
class_size_limit = 25
num_classes = math.ceil(num_students / class_size_limit)

print("🔧 Using", num_classes, "classes of size", class_size_limit)

# Create assignment variables
student_vars = []
for i in range(num_students):
    var = model.NewIntVar(0, num_classes - 1, f'student_{i}_class')
    student_vars.append(var)

# Boolean indicators: student in class
class_students = defaultdict(list)
for idx, var in enumerate(student_vars):
    for cls in range(num_classes):
        is_in_class = model.NewBoolVar(f'student_{idx}_is_in_class_{cls}')
        model.Add(var == cls).OnlyEnforceIf(is_in_class)
        model.Add(var != cls).OnlyEnforceIf(is_in_class.Not())
        class_students[cls].append(is_in_class)


🔧 Using 40 classes of size 25


In [40]:
# Constraint A: Each student assigned to exactly one class
for i in range(num_students):
    model.Add(sum(class_students[cls][i] for cls in range(num_classes)) == 1)

# Constraint B: Each class has exactly 25 students
for cls in range(num_classes):
    model.Add(sum(class_students[cls]) == class_size_limit)

print("📏 Constraints added: student assignment + fixed class sizes")


📏 Constraints added: student assignment + fixed class sizes


In [41]:
# Solve
solver = cp_model.CpSolver()
status = solver.Solve(model)

if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
    print("✅ Solution found!")
else:
    print("❌ No feasible solution found.")


✅ Solution found!


In [42]:
# Step 5: Extract the assigned class for each student

student_assignments = []

for i in range(num_students):
    assigned_class = solver.Value(student_vars[i])
    student_assignments.append({
        'StudentID': student_data.iloc[i]['Student_Index'],
        'Assigned_Class': assigned_class
    })

assignments_df = pd.DataFrame(student_assignments)
print(assignments_df.head())


   StudentID  Assigned_Class
0        0.0              19
1        1.0              25
2        2.0              27
3        3.0              28
4        4.0              12


In [44]:
student_data_with_class = student_data.copy()
student_data_with_class['Assigned_Class'] = assignments_df['Assigned_Class']

# Show merged data with assigned classes
print(student_data_with_class.head())

# Optional: check how many students per class
print("\nStudents per class:")
print(student_data_with_class['Assigned_Class'].value_counts().sort_index())


   Student_Index  Academic_Performance  Wellbeing_Score  Bullying_Score  \
0              0                    61         5.000000               1   
1              1                    53         1.666667               4   
2              2                    62         3.000000               1   
3              3                    96         3.000000               2   
4              4                    68         1.666667               2   

   Friendliness_Score  language  Assigned_Class  
0                4.50         0              19  
1                4.00         1              25  
2                4.75         0              27  
3                2.75         1              28  
4                4.75         0              12  

Students per class:
Assigned_Class
0     25
1     25
2     25
3     25
4     25
5     25
6     25
7     25
8     25
9     25
10    25
11    25
12    25
13    25
14    25
15    25
16    25
17    25
18    25
19    25
20    25
21    25
22    25
23    

In [45]:
# Step: Academic Performance Balance Constraint

max_allowed_difference = 20  # allowed average difference
class_academic_scores = {}

for cls in range(num_classes):
    total_academic = sum(
        int(student_data.loc[idx, 'Academic_Performance']) * class_students[cls][idx]
        for idx in range(num_students)
    )
    class_academic_scores[cls] = total_academic

# Add constraints: total academic score differences scaled by class size
for i in range(num_classes):
    for j in range(i + 1, num_classes):
        total_i = class_academic_scores[i]
        total_j = class_academic_scores[j]

        diff = model.NewIntVar(0, 10000, f'total_academic_diff_{i}_{j}')
        model.AddAbsEquality(diff, total_i - total_j)

        # Allow average diff × class size
        model.Add(diff <= max_allowed_difference * class_size_limit)

print("✅ Academic Performance Balance constraint added.")


✅ Academic Performance Balance constraint added.


In [46]:
# Step: Wellbeing Balance Constraint

max_allowed_wellbeing_diff = 3
class_wellbeing_scores = {}

for cls in range(num_classes):
    total_wellbeing = sum(
        int(student_data.loc[idx, 'Wellbeing_Score']) * class_students[cls][idx]
        for idx in range(num_students)
    )
    class_wellbeing_scores[cls] = total_wellbeing

# Pairwise difference constraint
for i in range(num_classes):
    for j in range(i + 1, num_classes):
        total_i = class_wellbeing_scores[i]
        total_j = class_wellbeing_scores[j]

        diff = model.NewIntVar(0, 10000, f'wellbeing_diff_{i}_{j}')
        model.AddAbsEquality(diff, total_i - total_j)
        model.Add(diff <= max_allowed_wellbeing_diff * class_size_limit)

print("✅ Wellbeing Balance constraint added.")


✅ Wellbeing Balance constraint added.


In [47]:
# Step: Bullying Spread Constraint

max_bullies_per_class = 3
bully_threshold = 7

for cls in range(num_classes):
    bully_vars = []
    for idx in range(num_students):
        is_bully = student_data.loc[idx, 'Bullying_Score'] >= bully_threshold
        if is_bully:
            bully_vars.append(class_students[cls][idx])
    model.Add(sum(bully_vars) <= max_bullies_per_class)

print("✅ Bullying Spread constraint added.")


✅ Bullying Spread constraint added.


In [49]:
# Add Is_Bully column because missing
student_data_with_class['Is_Bully'] = student_data_with_class['Bullying_Score'] >= 7

# Academic balance
print("Mean academic performance per class:")
print(student_data_with_class.groupby('Assigned_Class')['Academic_Performance'].mean())

# Wellbeing balance
print("\nMean wellbeing score per class:")
print(student_data_with_class.groupby('Assigned_Class')['Wellbeing_Score'].mean())

# Bully count
print("\nNumber of bullies per class:")
print(student_data_with_class.groupby('Assigned_Class')['Is_Bully'].sum())


Mean academic performance per class:
Assigned_Class
0     68.00
1     70.60
2     69.36
3     69.72
4     68.68
5     63.96
6     72.52
7     71.68
8     78.00
9     72.52
10    70.68
11    66.64
12    74.96
13    69.24
14    71.88
15    72.84
16    75.32
17    70.88
18    65.96
19    67.40
20    67.52
21    67.64
22    66.72
23    69.04
24    70.76
25    74.08
26    66.28
27    72.44
28    77.88
29    70.56
30    73.24
31    69.32
32    63.96
33    70.80
34    69.48
35    74.32
36    71.56
37    65.68
38    73.52
39    68.76
Name: Academic_Performance, dtype: float64

Mean wellbeing score per class:
Assigned_Class
0     3.533333
1     3.680000
2     3.840000
3     3.960000
4     3.813333
5     3.413333
6     3.453333
7     3.866667
8     3.853333
9     3.200000
10    3.573333
11    3.600000
12    3.213333
13    3.480000
14    3.760000
15    3.986667
16    3.560000
17    3.880000
18    3.613333
19    3.693333
20    3.186667
21    3.466667
22    3.640000
23    3.413333
24    3.946667
25