In [1]:
from ortools.sat.python import cp_model
import pandas as pd

In [2]:
# Read CSV
class_list = pd.read_csv("class_list.csv")
timeslots_days = pd.read_csv("timeslots.csv")

lessons_required = pd.read_csv("lessons_required.csv")
weekday = pd.read_csv("weekdays.csv")
teacher_classes = pd.read_csv("teacher_classes.csv")
student_classes = pd.read_csv("student_classes.csv")

teacher_list = pd.read_csv("teacher_list.csv")

In [3]:
student_classes = student_classes[student_classes['class']!='isizulu-12']
teacher_classes = teacher_classes[teacher_classes['class']!='isizulu-12']

In [4]:
classes = list(class_list['class'])
timeslots = list(timeslots_days['timeslot'])
days = list(weekday['weekday'])
periods = range(9)
teacher = list(teacher_list['teacher'])

num_lessons = dict(zip(lessons_required["class"], lessons_required["num_lessons"]))
class_teacher = dict(zip(teacher_classes["class"], teacher_classes["teacher"]))
class_students = student_classes.groupby("class")["student"].apply(list).to_dict()

In [5]:
# Quick diagnostics
missing_lessons = [c for c in classes if c not in num_lessons]
if missing_lessons:
    print("Warning: these classes lack a num_lessons entry:", missing_lessons)

## Model

In [6]:
model = cp_model.CpModel()

## Variables

We’ll create one **binary decision variable** per `(class, day, period)`

In [7]:
# Variables
x = {}
for c in classes:
    for d in days:
        for p in periods:
            x[(c,d,p)] = model.NewBoolVar(f'x_{c}_{d}_{p}')

## Constraints

### Required lessons per class

Each class must be scheduled the exact number of times specified in `num_lessons`.

In [8]:
# Constraint: each class gets required lessons
for c in classes:
    if c not in num_lessons:
        model.Add(sum(x[(c,d,p)] for d in days for p in periods) == 0)
    else:
        model.Add(sum(x[(c,d,p)] for d in days for p in periods) == int(num_lessons[c]))

### Teachers cannot be double-booked

For each `(day, period)`, a teacher can only teach one of their classes.

In [9]:
for d in days:
    for p in periods:
        for t in set(class_teacher.values()): 
            teaching_classes = [cls for cls, teacher in class_teacher.items() if teacher == t]
            if teaching_classes:
                model.Add(sum(x[(c,d,p)] for c in teaching_classes if c in classes) <= 1)

### Students cannot be double-booked

Same idea: For each `(day, period)`, a student can only attend one class.

In [10]:
# Soft constraint: students should not have clashes
violations = []
for d in days:
    for p in periods:
        all_students = {s for students in class_students.values() for s in students}
        for s in all_students:
            enrolled_classes = [c for c, students in class_students.items() if s in students]
            if enrolled_classes:
                v = model.NewBoolVar(f"violation[{s},{d},{p}]")
                violations.append(v)
                model.Add(sum(x[(c,d,p)] for c in enrolled_classes if c in classes) <= 1 + v)

### Spread lessons across days (soft preference)

For now, we’ll keep it simple, but later we can add an **objective** to penalize imbalance.

In [11]:
violations_spread = []
import math
for c in classes:
    lessons_needed = num_lessons[c]
    avg = lessons_needed / len(days)  # target lessons per day (can be fractional)

    for d in days:
        day_lessons = sum(x[(c, d, p)] for p in periods)
        dev = model.NewIntVar(0, lessons_needed, f"spread_dev[{c},{d}]")
        violations_spread.append(dev)
        model.Add(day_lessons - math.ceil(avg) <= dev)
        model.Add(math.floor(avg) - day_lessons <= dev)

In [12]:
for c in classes:
    for d in days:
        vars_day = [x[(c, d, p)] for p in periods]
        is_two = model.NewBoolVar(f"{c}_{d}_is_two")
        model.Add(sum(vars_day) == 2).OnlyEnforceIf(is_two)
        model.Add(sum(vars_day) <= 1).OnlyEnforceIf(is_two.Not()) 
        if len(periods) >= 2:
            pair_bools = []
            for p_idx in range(len(periods) - 1):
                pair = model.NewBoolVar(f"{c}_{d}_pair_{p_idx}")
                pair_bools.append(pair)
                # pair -> both vars are 1
                model.Add(vars_day[p_idx] == 1).OnlyEnforceIf(pair)
                model.Add(vars_day[p_idx+1] == 1).OnlyEnforceIf(pair)
            model.Add(sum(pair_bools) >= 1).OnlyEnforceIf(is_two)

In [13]:
# Objective: minimize both total clashes and uneven spread
model.Minimize(
    sum(violations)             # student clashes
    + sum(violations_spread)    # uneven spread
)

## Step 3. Solver & Output

In [14]:
solver = cp_model.CpSolver()
solver.parameters.max_time_in_seconds = 20
status = solver.Solve(model)

In [15]:
if status in [cp_model.OPTIMAL, cp_model.FEASIBLE]:
    rows = []
    for (c, d, p), var in x.items():
        if solver.Value(var) == 1:
            rows.append([d, p, c])
    output = pd.DataFrame(rows, columns=['weekday', 'period', 'class'])
    print("Solution written to timetable_solution.csv")
else:
    raise RuntimeError("No feasible solution found. Stopping execution.")

Solution written to timetable_solution.csv


In [16]:
period_interval = timeslots_days.copy()

# Split on ' - '
split_cols = period_interval['timeslot'].str.rsplit(' - ', n=1, expand=True)

# Clean start_time (remove weekday, replace '_' with ':')
period_interval['start_time'] = (
    split_cols[0]
    .str.replace(r'^[A-Z]+\s*', '', regex=True)  # drop weekday like 'FRI'
    .str.replace('_', ':', regex=False)          # turn 07_45 -> 07:45
    .str.strip()
)

# Get unique start times in order of appearance
unique_times = period_interval['start_time'].drop_duplicates().tolist()

period_start_times = pd.DataFrame({
    "period": range(len(unique_times)),
    "start_time": unique_times
})

period_interval = period_interval.merge(period_start_times, on="start_time", how="left")

In [17]:
final = output.merge(period_interval, on = ['period', 'weekday'], how  ='left')[['timeslot', 'class']]

In [18]:
final.to_csv("timetable.csv", index=False)