In [1]:
from ortools.sat.python import cp_model
from collections import defaultdict, Counter
import random
import pandas as pd
import itertools
import math

In [2]:
lessons_required = pd.read_csv("lessons_required.csv")#lessons_required.csv
student_classes = pd.read_csv("student_classes.csv")
teacher_classes = pd.read_csv("teacher_classes.csv")
timeslots = pd.read_csv("timeslots.csv")

In [3]:
lessons = lessons_required.loc[lessons_required.index.repeat(lessons_required['num_lessons'])][['class']].reset_index(drop=True)

In [4]:
classes_student_list = (
    student_classes.groupby('class')['student']
    .agg([
        ('students', lambda x: ', '.join(sorted(x.unique()))),
        ('total_students', lambda x: x.nunique())
    ])
    .reset_index()
)


teacher_classes_list = (
    teacher_classes.groupby('class')['teacher']
    .agg([
        ('teachers', lambda x: ', '.join(sorted(x.unique()))),
        ('total_teachers', lambda x: x.nunique())
    ])
    .reset_index()
)

In [5]:
timetable = (
    lessons
        .merge(classes_student_list, on='class', how='left')
        .merge(teacher_classes_list, on='class', how='left')
)

In [6]:
# timetable['timeslot'], timetable['start_time'], timetable['weekday'] = None, None, None
timetable = timetable[['class', 'students', 'total_students', 'teachers', 'total_teachers']]

In [7]:
def greedy_schedule(timetable_df, weekdays=None, periods_per_day=9, random_seed=42):
    if weekdays is None:
        weekdays = ['MON','TUE','WED','THU','FRI']
    days = list(range(len(weekdays)))
    periods = list(range(periods_per_day))
    timeslot_list = [(d,p) for d in days for p in periods]

    df = timetable_df.copy().reset_index(drop=True)
    def to_list(v):
        if pd.isna(v): return []
        if isinstance(v, str):
            return [s.strip() for s in v.split(',') if s.strip()]
        if isinstance(v, (list, tuple, set)):
            return list(v)
        return [v]
    df['teacher_list'] = df['teachers'].apply(to_list)
    df['student_list'] = df['students'].apply(to_list)

    # structures to quickly check conflicts
    teacher_at_slot = defaultdict(set)  # (day,period) -> set(teachers)
    student_at_slot = defaultdict(set)  # (day,period) -> set(students)
    class_days = defaultdict(lambda: defaultdict(list))  # class -> day -> list(periods assigned)

    # result arrays
    assigned_weekday = [None]*len(df)
    assigned_period = [None]*len(df)

    random.seed(random_seed)
    lessons_order = list(range(len(df)))
    # heuristic: schedule classes with largest student counts first or most constrained (by teacher load)
    df['n_students'] = df['student_list'].apply(len)
    lessons_order.sort(key=lambda l: (-df.loc[l,'n_students'], df.loc[l,'class']))

    for l in lessons_order:
        best_score = None
        best_slot = None
        for (d,p) in timeslot_list:
            # hard teacher clash check
            teachers = df.at[l,'teacher_list']
            if any(t in teacher_at_slot[(d,p)] for t in teachers):
                continue
            # check student clashes (we avoid where possible)
            students = df.at[l,'student_list']
            student_conflicts = sum(1 for s in students if s in student_at_slot[(d,p)])

            # measure "spread" utility: prefer days where same class has fewer lessons
            cls = df.at[l,'class']
            num_class_on_day = len(class_days[cls][d])
            # prefer days with fewer class lessons
            spread_score = -num_class_on_day

            # prefer consecutive: if class has lessons on same day, check distance to nearest period
            nearest_dist = None
            if class_days[cls][d]:
                nearest_dist = min(abs(p - q) for q in class_days[cls][d])
            else:
                nearest_dist = 0  # starting day

            # scoring: lower is better
            # weight student_conflicts heavily, then penalize having multiple on same day, then distance
            score = (student_conflicts * 1000) + (num_class_on_day * 10) + nearest_dist
            # tie break with random
            score = (score, random.random())

            if best_score is None or score < best_score:
                best_score = score
                best_slot = (d,p)
        if best_slot is None:
            # no slot without teacher clash found; try to place into any slot by overriding teacher (should not happen)
            raise Exception(f"No feasible slot found for lesson {l} (teacher clash everywhere).")
        d,p = best_slot
        assigned_weekday[l] = weekdays[d]
        assigned_period[l] = p
        for t in df.at[l,'teacher_list']:
            teacher_at_slot[(d,p)].add(t)
        for s in df.at[l,'student_list']:
            student_at_slot[(d,p)].add(s)
        class_days[df.at[l,'class']][d].append(p)

    out = df.copy()
    out['weekday'] = assigned_weekday
    out['period'] = assigned_period
    return out

In [8]:
schedule = greedy_schedule(timetable, weekdays=['MON','TUE','WED','THU','FRI'], periods_per_day=9)

In [9]:
period_interval = timeslots.copy()

# Split on ' - '
split_cols = period_interval['timeslot'].str.split(' - ', n=1, expand=True)

# Clean start_time (remove weekday, replace '_' with ':')
period_interval['start_time'] = (
    split_cols[0]
    .str.replace(r'^[A-Z]+\s*', '', regex=True)  # drop weekday like 'FRI'
    .str.replace('_', ':', regex=False)          # turn 07_45 -> 07:45
    .str.strip()
)

# Get unique start times in order of appearance
unique_times = period_interval['start_time'].drop_duplicates().tolist()

period_start_times = pd.DataFrame({
    "period": range(len(unique_times)),
    "start_time": unique_times
})

period_interval = period_interval.merge(period_start_times, on="start_time", how="left")

In [10]:
final = schedule.merge(period_interval, on = ['period', 'weekday'], how='left')[['timeslot', 'class']]

In [11]:
final.to_csv("timetable1.csv", index=False)

In [12]:
# python opt_test.py timetable1.csv