In [1]:
import pandas as pd
from datetime import datetime
import re

In [None]:
df = pd.read_csv("/home/gokul/gigs/hacktheburgh/data/uoe/tt_child_sem1_infr.csv")
df.head()

Unnamed: 0,Name of Module,Name,Day,Start Time,End Time,Weeks,course_code,fixed_event
0,Informatics Research Review,Informatics Research Review - Q&A Session,Wed,14:00:00,15:00:00,Sem1 wk1-Sem1 wk10,INFR11136,False
1,Discrete Mathematics and Probability,Discrete Mathematics and Probability - Lecture,Mon,14:00:00,15:00:00,Sem1 wk1-Sem1 wk11,INFR08031,False
2,Discrete Mathematics and Probability,Discrete Mathematics and Probability - Lecture,Thu,14:00:00,15:00:00,Sem1 wk1-Sem1 wk11,INFR08031,False
3,Machine Learning and Pattern Recognition,Machine Learning and Pattern Recognition - Lec...,Tue,09:00:00,10:00:00,Sem1 wk1-Sem1 wk10,INFR11130,False
4,Machine Learning and Pattern Recognition,Machine Learning and Pattern Recognition - Lec...,Thu,09:00:00,10:00:00,Sem1 wk1-Sem1 wk10,INFR11130,False


In [3]:
# Convert time strings to numerical format for easier comparison
def time_to_float(time_str):
    """Converts time from HH:MM:SS format to float (e.g., 14:30:00 -> 14.5)."""
    time_obj = datetime.strptime(time_str, "%H:%M:%S")
    return time_obj.hour + time_obj.minute / 60


# Preprocess the DataFrame
df["Start Time"] = df["Start Time"].apply(time_to_float)
df["End Time"] = df["End Time"].apply(time_to_float)

# Convert fixed_event column to boolean
df["fixed_event"] = df["fixed_event"].astype(bool)

# Convert day names to numerical values for easier sorting (Monday=0, ..., Sunday=6)
day_mapping = {"Mon": 0, "Tue": 1, "Wed": 2, "Thu": 3, "Fri": 4, "Sat": 5, "Sun": 6}
df["Day"] = df["Day"].map(day_mapping)


def parse_weeks(s: str) -> set:
    # Split the string by comma to handle multiple intervals
    intervals = s.split(",")
    result = set()

    # Process each interval
    for interval in intervals:
        # Remove extra whitespace
        interval = interval.strip()
        # Find week numbers using regex: this finds all occurrences of "wk<number>"
        numbers = re.findall(r"wk(\d+)", interval)
        if len(numbers) == 2:
            start, end = int(numbers[0]), int(numbers[1])
            # Add all week numbers from start to end (inclusive)
            result.update(range(start, end + 1))

    return result


# Apply parsing to the dataset
df["Weeks"] = df["Weeks"].apply(parse_weeks)

print(df.shape)
df.head()

(364, 8)


Unnamed: 0,Name of Module,Name,Day,Start Time,End Time,Weeks,course_code,fixed_event
0,Informatics Research Review,Informatics Research Review - Q&A Session,2,14.0,15.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}",INFR11136,False
1,Discrete Mathematics and Probability,Discrete Mathematics and Probability - Lecture,0,14.0,15.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}",INFR08031,False
2,Discrete Mathematics and Probability,Discrete Mathematics and Probability - Lecture,3,14.0,15.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}",INFR08031,False
3,Machine Learning and Pattern Recognition,Machine Learning and Pattern Recognition - Lec...,1,9.0,10.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}",INFR11130,False
4,Machine Learning and Pattern Recognition,Machine Learning and Pattern Recognition - Lec...,3,9.0,10.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}",INFR11130,False


In [13]:
import pandas as pd
from itertools import product
from collections import defaultdict


def weeks_overlap(weeks1, weeks2):
    """
    Check if there is any common week between two sets/lists of weeks.
    """
    w1 = set(weeks1)
    w2 = set(weeks2)
    return len(w1.intersection(w2)) > 0


def events_clash(e1, e2):
    """
    Returns True if e1 and e2 clash.
    They clash if:
      1. Same day
      2. Overlapping time
      3. Overlapping weeks
    """
    if e1["Day"] != e2["Day"]:
        return False

    # Check time overlap
    time_overlap = not (
        e1["End Time"] <= e2["Start Time"] or e2["End Time"] <= e1["Start Time"]
    )
    # Check week overlap
    week_overlap = weeks_overlap(e1["Weeks"], e2["Weeks"])

    return time_overlap and week_overlap


def schedule_clashes(schedule):
    """
    Given a list of events (dicts), returns:
      - (True, list_of_clashes) if there is any clash,
      - (False, []) otherwise.
    """
    n = len(schedule)
    clashes = []
    for i in range(n):
        for j in range(i + 1, n):
            if events_clash(schedule[i], schedule[j]):
                clashes.append((schedule[i], schedule[j]))
    return (len(clashes) > 0, clashes)


def compute_free_time(schedule):
    """
    Computes the total 'gap' time in the schedule (sum of gaps between events).
    We do this by day, ignoring weeks in the gap calculation (since the times repeat each week).
    """
    day_to_events = defaultdict(list)
    for e in schedule:
        day_to_events[e["Day"]].append(e)

    total_gap = 0.0
    for day, evts in day_to_events.items():
        # Sort by start time
        evts_sorted = sorted(evts, key=lambda x: x["Start Time"])
        # Sum up the gaps between consecutive events
        for i in range(len(evts_sorted) - 1):
            current_end = evts_sorted[i]["End Time"]
            next_start = evts_sorted[i + 1]["Start Time"]
            gap = next_start - current_end
            if gap > 0:
                total_gap += gap
    return total_gap


def build_schedule_from_df(df, course_codes, preference="tight"):
    """
    Build a schedule for the given course_codes, using the provided DataFrame.
    'preference' can be 'tight' (minimize free time) or 'spaced' (maximize free time).

    Returns:
      - (best_schedule, best_free_time) if a valid schedule is found
      - (None, None) if no valid schedule is possible

    Additionally, collects clash info for each failed combination, which
    can be used to report what is clashing if no schedule is found.
    """

    # Filter the dataframe rows to only those that match the requested courses
    df_relevant = df[df["course_code"].isin(course_codes)].copy()

    # Convert each row into a dictionary
    # (Make sure 'Weeks' is already a list of ints; if it's a string, parse it.)
    all_events = df_relevant.to_dict("records")

    # Separate fixed events from flexible events
    fixed_events = []
    # We'll group flexible events by (course_code, Name)
    event_groups_dict = defaultdict(list)

    for evt in all_events:
        if evt["fixed_event"] == True:
            fixed_events.append(evt)
        else:
            key = (evt["course_code"], evt["Name"])
            event_groups_dict[key].append(evt)

    # Convert event_groups_dict to a list of lists
    # Each sub-list is a group of possible slots for one "event"
    event_groups = list(event_groups_dict.values())

    best_schedule = None
    best_free_time = None

    # We'll do a simple cartesian product over all groups:
    # For each group, pick exactly one event from that group.
    all_combinations = product(*event_groups)

    # Store all clashes if we want to report them later
    all_clash_info = []

    for combo in all_combinations:
        # Combine the chosen slots with the fixed events
        trial_schedule = fixed_events + list(combo)

        # Check for clashes
        clash_found, clash_pairs = schedule_clashes(trial_schedule)
        if clash_found:
            # Store them if we want to show user later
            all_clash_info.append(clash_pairs)
            continue

        # If no clash, compute free time
        gap_time = compute_free_time(trial_schedule)

        if best_schedule is None:
            # First valid schedule
            best_schedule = trial_schedule
            best_free_time = gap_time
        else:
            # Check preference
            if preference == "tight":
                # We want to minimize gap_time
                if gap_time < best_free_time:
                    best_schedule = trial_schedule
                    best_free_time = gap_time
            else:  # preference == 'spaced'
                # We want to maximize gap_time
                if gap_time > best_free_time:
                    best_schedule = trial_schedule
                    best_free_time = gap_time

    # If we found no valid schedule, return None plus the clash info
    if best_schedule is None:
        return None, None, all_clash_info
    else:
        return best_schedule, best_free_time, all_clash_info


# Let's say the user wants to take these courses:
user_courses = ["INFR11135", "INFR08031", "INFR11136"]
preference = "tight"  # or "spaced"

schedule, free_time, clash_info = build_schedule_from_df(
    df, user_courses, preference=preference
)

if schedule is None:
    print("No clash-free schedule possible for these courses!\n")
    # Show some clash details to help the user understand what's happening
    if clash_info:
        # We'll just show the first set of clashes found (for brevity),
        # but you could iterate over all sets if you want more detail.
        print("Here are some clashing events from one of the failed combinations:")
        for e1, e2 in clash_info[0]:
            print(" - Clash between:")
            print(
                f"     {e1['course_code']} | {e1['Name']} | "
                f"Day {e1['Day']} {e1['Start Time']}-{e1['End Time']} | Weeks={e1['Weeks']}"
            )
            print(
                f"     {e2['course_code']} | {e2['Name']} | "
                f"Day {e2['Day']} {e2['Start Time']}-{e2['End Time']} | Weeks={e2['Weeks']}"
            )
else:
    print(f"Found a schedule with free_time = {free_time} (preference: {preference})\n")
    print("Schedule details:")
    # Sort the final schedule by (Day, Start Time) for nicer printing
    schedule_sorted = sorted(schedule, key=lambda x: (x["Day"], x["Start Time"]))
    for evt in schedule_sorted:
        print(
            f"  {evt['course_code']} | {evt['Name']} | Day {evt['Day']} "
            f"| {evt['Start Time']}-{evt['End Time']} | Weeks={evt['Weeks']}"
        )

No clash-free schedule possible for these courses!

Here are some clashing events from one of the failed combinations:
 - Clash between:
     INFR11136 | Informatics Research Review - Q&A Session | Day 2 14.0-15.0 | Weeks={1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
     INFR08031 | Discrete Mathematics and Probability - Tutorial/05 | Day 2 14.0-15.0 | Weeks={2, 3, 4, 5, 7, 8, 9, 10}
 - Clash between:
     INFR08031 | Discrete Mathematics and Probability - Tutorial/03 | Day 2 13.0-14.0 | Weeks={2, 3, 4, 5, 7, 8, 9, 10}
     INFR08031 | Discrete Mathematics and Probability - Tutorial/04 | Day 2 13.0-14.0 | Weeks={2, 3, 4, 5, 7, 8, 9, 10}
 - Clash between:
     INFR11136 | Informatics Research Review - Tutorial/14 | Day 3 13.0-14.0 | Weeks={3, 4, 5, 6, 7, 8, 9, 10}
     INFR08031 | Discrete Mathematics and Probability - Tutorial/07 | Day 3 13.0-14.0 | Weeks={2, 3, 4, 5, 7, 8, 9, 10}


In [None]:
df = pd.read_excel(
    "/home/gokul/gigs/hacktheburgh/data/uoe/tt_child_sem1.xlsx", sheet_name="Sem1 TT"
)
print(df.shape)
df.head()

(15907, 18)


Unnamed: 0,Name of Department,Module ID,Name of Module,Name of Type,Activity ID,Name,Day,Start Time,End Time,Whole-Class/Subgroup,Size,Real Size,Duration,Weeks,Number of weeks,Room,Building,Campus
0,Deanery of Biomedical Sciences,MSBM08002_SS1_SEM1,Medical Sciences 1.,*Workshop,#SPLUS03A0D8,Medical Sciences 1 - Workshop/A,Tue,10:00:00,13:00:00,,20,18,6,"Sem1 wk1-Sem1 wk8, Sem1 wk10-Sem1 wk11",10,OC_Teaching Room 05,Old College,*Central
1,Deanery of Biomedical Sciences,MSBM08002_SS1_SEM1,Medical Sciences 1.,*Workshop,#SPLUS03A0DA,Medical Sciences 1 - Workshop/B,Tue,10:00:00,13:00:00,,20,16,6,"Sem1 wk1-Sem1 wk8, Sem1 wk10-Sem1 wk11",10,7-8CS_1.03,7-8 Chambers Street,*Central
2,Deanery of Biomedical Sciences,MSBM10011_SS1_YR,Medical Sciences 4,*Tutorial,#SPLUS03A0E8 002,Medical Sciences 4 - Tutorial/01,Wed,11:00:00,13:00:00,,15,14,4,Sem1 wk11,1,MST_Teaching Room 08 (1.420) - Doorway 3,"Medical School, Teviot",*Central
3,Deanery of Biomedical Sciences,MSBM10011_SS1_YR,Medical Sciences 4,*Tutorial,#SPLUS03A0E8 001,Medical Sciences 4 - Tutorial/01,Wed,09:00:00,11:00:00,,15,14,4,Sem1 wk2-Sem1 wk10,9,MST_Teaching Room 08 (1.420) - Doorway 3,"Medical School, Teviot",*Central
4,Deanery of Biomedical Sciences,MSBM10011_SS1_YR,Medical Sciences 4,*Tutorial,#SPLUS03A0EA 002,Medical Sciences 4 - Tutorial/02,Wed,11:00:00,13:00:00,,15,13,4,Sem1 wk11,1,MST_Teaching Room 13 (01M.473) - Doorway 3,"Medical School, Teviot",*Central


In [3]:
df["course_code"] = df["Module ID"].str.split("_").str[0]
print(df.shape)
df.head()

(15907, 19)


Unnamed: 0,Name of Department,Module ID,Name of Module,Name of Type,Activity ID,Name,Day,Start Time,End Time,Whole-Class/Subgroup,Size,Real Size,Duration,Weeks,Number of weeks,Room,Building,Campus,course_code
0,Deanery of Biomedical Sciences,MSBM08002_SS1_SEM1,Medical Sciences 1.,*Workshop,#SPLUS03A0D8,Medical Sciences 1 - Workshop/A,Tue,10:00:00,13:00:00,,20,18,6,"Sem1 wk1-Sem1 wk8, Sem1 wk10-Sem1 wk11",10,OC_Teaching Room 05,Old College,*Central,MSBM08002
1,Deanery of Biomedical Sciences,MSBM08002_SS1_SEM1,Medical Sciences 1.,*Workshop,#SPLUS03A0DA,Medical Sciences 1 - Workshop/B,Tue,10:00:00,13:00:00,,20,16,6,"Sem1 wk1-Sem1 wk8, Sem1 wk10-Sem1 wk11",10,7-8CS_1.03,7-8 Chambers Street,*Central,MSBM08002
2,Deanery of Biomedical Sciences,MSBM10011_SS1_YR,Medical Sciences 4,*Tutorial,#SPLUS03A0E8 002,Medical Sciences 4 - Tutorial/01,Wed,11:00:00,13:00:00,,15,14,4,Sem1 wk11,1,MST_Teaching Room 08 (1.420) - Doorway 3,"Medical School, Teviot",*Central,MSBM10011
3,Deanery of Biomedical Sciences,MSBM10011_SS1_YR,Medical Sciences 4,*Tutorial,#SPLUS03A0E8 001,Medical Sciences 4 - Tutorial/01,Wed,09:00:00,11:00:00,,15,14,4,Sem1 wk2-Sem1 wk10,9,MST_Teaching Room 08 (1.420) - Doorway 3,"Medical School, Teviot",*Central,MSBM10011
4,Deanery of Biomedical Sciences,MSBM10011_SS1_YR,Medical Sciences 4,*Tutorial,#SPLUS03A0EA 002,Medical Sciences 4 - Tutorial/02,Wed,11:00:00,13:00:00,,15,13,4,Sem1 wk11,1,MST_Teaching Room 13 (01M.473) - Doorway 3,"Medical School, Teviot",*Central,MSBM10011


In [4]:
df = df[~df["course_code"].isna()]
print(df.shape)
df.head()

(14578, 19)


Unnamed: 0,Name of Department,Module ID,Name of Module,Name of Type,Activity ID,Name,Day,Start Time,End Time,Whole-Class/Subgroup,Size,Real Size,Duration,Weeks,Number of weeks,Room,Building,Campus,course_code
0,Deanery of Biomedical Sciences,MSBM08002_SS1_SEM1,Medical Sciences 1.,*Workshop,#SPLUS03A0D8,Medical Sciences 1 - Workshop/A,Tue,10:00:00,13:00:00,,20,18,6,"Sem1 wk1-Sem1 wk8, Sem1 wk10-Sem1 wk11",10,OC_Teaching Room 05,Old College,*Central,MSBM08002
1,Deanery of Biomedical Sciences,MSBM08002_SS1_SEM1,Medical Sciences 1.,*Workshop,#SPLUS03A0DA,Medical Sciences 1 - Workshop/B,Tue,10:00:00,13:00:00,,20,16,6,"Sem1 wk1-Sem1 wk8, Sem1 wk10-Sem1 wk11",10,7-8CS_1.03,7-8 Chambers Street,*Central,MSBM08002
2,Deanery of Biomedical Sciences,MSBM10011_SS1_YR,Medical Sciences 4,*Tutorial,#SPLUS03A0E8 002,Medical Sciences 4 - Tutorial/01,Wed,11:00:00,13:00:00,,15,14,4,Sem1 wk11,1,MST_Teaching Room 08 (1.420) - Doorway 3,"Medical School, Teviot",*Central,MSBM10011
3,Deanery of Biomedical Sciences,MSBM10011_SS1_YR,Medical Sciences 4,*Tutorial,#SPLUS03A0E8 001,Medical Sciences 4 - Tutorial/01,Wed,09:00:00,11:00:00,,15,14,4,Sem1 wk2-Sem1 wk10,9,MST_Teaching Room 08 (1.420) - Doorway 3,"Medical School, Teviot",*Central,MSBM10011
4,Deanery of Biomedical Sciences,MSBM10011_SS1_YR,Medical Sciences 4,*Tutorial,#SPLUS03A0EA 002,Medical Sciences 4 - Tutorial/02,Wed,11:00:00,13:00:00,,15,13,4,Sem1 wk11,1,MST_Teaching Room 13 (01M.473) - Doorway 3,"Medical School, Teviot",*Central,MSBM10011


In [5]:
df_infr = df[df["course_code"].str.contains("INF")]
print(df_infr.shape)
df_infr.head()

(364, 19)


Unnamed: 0,Name of Department,Module ID,Name of Module,Name of Type,Activity ID,Name,Day,Start Time,End Time,Whole-Class/Subgroup,Size,Real Size,Duration,Weeks,Number of weeks,Room,Building,Campus,course_code
253,School of Informatics,INFR11136_SS1_SEM1,Informatics Research Review,Q&A Session - Online Live,#SPLUSBE734B,Informatics Research Review - Q&A Session,Wed,14:00:00,15:00:00,WC,305,221,2,Sem1 wk1-Sem1 wk10,10,0,0,0,INFR11136
254,School of Informatics,INFR08031_SV1_SEM1,Discrete Mathematics and Probability,*Lecture,#SPLUSBE7351,Discrete Mathematics and Probability - Lecture,Mon,14:00:00,15:00:00,WC,213,214,2,Sem1 wk1-Sem1 wk11,11,40GS_Lecture Theatre B,40 George Square Lecture Theatres,*George Square/Buccleuch,INFR08031
255,School of Informatics,INFR08031_SV1_SEM1,Discrete Mathematics and Probability,*Lecture,#SPLUSBE7353,Discrete Mathematics and Probability - Lecture,Thu,14:00:00,15:00:00,WC,213,214,2,Sem1 wk1-Sem1 wk11,11,40GS_Lecture Theatre B,40 George Square Lecture Theatres,*George Square/Buccleuch,INFR08031
256,School of Informatics,INFR11130_SV1_SEM1,Machine Learning and Pattern Recognition,*Lecture,#SPLUSBE73C9,Machine Learning and Pattern Recognition - Lec...,Tue,09:00:00,10:00:00,WC,156,142,2,Sem1 wk1-Sem1 wk10,10,AT_Lecture Theatre 3,Appleton Tower,*George Square/Buccleuch,INFR11130
257,School of Informatics,INFR11130_SV1_SEM1,Machine Learning and Pattern Recognition,*Lecture,#SPLUSBE73CD,Machine Learning and Pattern Recognition - Lec...,Thu,09:00:00,10:00:00,WC,156,142,2,Sem1 wk1-Sem1 wk10,10,AT_Lecture Theatre 1,Appleton Tower,*George Square/Buccleuch,INFR11130


In [6]:
df_infr["Name of Type"].value_counts()

Name of Type
*Tutorial                    184
*Lecture                      95
Computer Workshop             55
*Workshop                     12
Q&A Session                    9
*Laboratory                    4
*Practical                     2
*Lecture - Online Live         2
Q&A Session - Online Live      1
Name: count, dtype: int64

In [7]:
df_infr = df_infr.copy()
df_infr.loc[:, "fixed_event"] = False
df_infr.loc[
    df["Name of Type"].isin(["Lecture", "*Lecture - Online Live"]), "fixed_event"
] = True
print(df_infr.shape)
df_infr.head()

(364, 20)


Unnamed: 0,Name of Department,Module ID,Name of Module,Name of Type,Activity ID,Name,Day,Start Time,End Time,Whole-Class/Subgroup,Size,Real Size,Duration,Weeks,Number of weeks,Room,Building,Campus,course_code,fixed_event
253,School of Informatics,INFR11136_SS1_SEM1,Informatics Research Review,Q&A Session - Online Live,#SPLUSBE734B,Informatics Research Review - Q&A Session,Wed,14:00:00,15:00:00,WC,305,221,2,Sem1 wk1-Sem1 wk10,10,0,0,0,INFR11136,False
254,School of Informatics,INFR08031_SV1_SEM1,Discrete Mathematics and Probability,*Lecture,#SPLUSBE7351,Discrete Mathematics and Probability - Lecture,Mon,14:00:00,15:00:00,WC,213,214,2,Sem1 wk1-Sem1 wk11,11,40GS_Lecture Theatre B,40 George Square Lecture Theatres,*George Square/Buccleuch,INFR08031,False
255,School of Informatics,INFR08031_SV1_SEM1,Discrete Mathematics and Probability,*Lecture,#SPLUSBE7353,Discrete Mathematics and Probability - Lecture,Thu,14:00:00,15:00:00,WC,213,214,2,Sem1 wk1-Sem1 wk11,11,40GS_Lecture Theatre B,40 George Square Lecture Theatres,*George Square/Buccleuch,INFR08031,False
256,School of Informatics,INFR11130_SV1_SEM1,Machine Learning and Pattern Recognition,*Lecture,#SPLUSBE73C9,Machine Learning and Pattern Recognition - Lec...,Tue,09:00:00,10:00:00,WC,156,142,2,Sem1 wk1-Sem1 wk10,10,AT_Lecture Theatre 3,Appleton Tower,*George Square/Buccleuch,INFR11130,False
257,School of Informatics,INFR11130_SV1_SEM1,Machine Learning and Pattern Recognition,*Lecture,#SPLUSBE73CD,Machine Learning and Pattern Recognition - Lec...,Thu,09:00:00,10:00:00,WC,156,142,2,Sem1 wk1-Sem1 wk10,10,AT_Lecture Theatre 1,Appleton Tower,*George Square/Buccleuch,INFR11130,False


In [None]:
df_infr[
    [
        "Name of Module",
        "Name",
        "Day",
        "Start Time",
        "End Time",
        "Weeks",
        "course_code",
        "fixed_event",
    ]
].to_csv("/home/gokul/gigs/hacktheburgh/data/uoe/tt_child_sem1_infr.csv", index=False)

In [None]:
import pandas as pd
from itertools import permutations
from itertools import product


# Sample event data
events = [
    {"name": "Fixed Event 1", "start": 9, "end": 10, "fixed": True},
    {"name": "Fixed Event 2", "start": 13, "end": 14, "fixed": True},
    {"name": "Flexible Event A", "slots": [(10, 11), (11, 12)], "fixed": False},
    {"name": "Flexible Event B", "slots": [(12, 13), (14, 15)], "fixed": False},
]


# Function to schedule based on preference
def schedule_events(events, preference="tight"):
    fixed_events = [e for e in events if e["fixed"]]
    flexible_events = [e for e in events if not e["fixed"]]

    # Sort fixed events in time order
    fixed_events.sort(key=lambda x: x["start"])

    best_schedule = None
    best_gap_score = float("inf") if preference == "tight" else float("-inf")

    # Try all permutations of flexible event slots
    for perm in permutations(
        [
            {"name": e["name"], "start": s[0], "end": s[1]}
            for e in flexible_events
            for s in e["slots"]
        ],
        len(flexible_events),
    ):
        schedule = fixed_events + list(perm)
        schedule.sort(key=lambda x: x["start"])

        # Check for overlap
        valid = True
        for i in range(len(schedule) - 1):
            if schedule[i]["end"] > schedule[i + 1]["start"]:
                valid = False
                break

        if not valid:
            continue

        # Calculate gap score
        gaps = [
            schedule[i + 1]["start"] - schedule[i]["end"]
            for i in range(len(schedule) - 1)
        ]
        total_gap = sum(gaps)

        if (preference == "tight" and total_gap < best_gap_score) or (
            preference == "spaced" and total_gap > best_gap_score
        ):
            best_schedule = schedule
            best_gap_score = total_gap

    return best_schedule


# Get schedule
tight_schedule = schedule_events(events, "tight")
spaced_schedule = schedule_events(events, "spaced")

# Display results
df_tight = pd.DataFrame(tight_schedule)
df_spaced = pd.DataFrame(spaced_schedule)

In [3]:
df_tight

Unnamed: 0,name,start,end,fixed
0,Fixed Event 1,9,10,True
1,Flexible Event A,10,11,
2,Flexible Event A,11,12,
3,Fixed Event 2,13,14,True


In [4]:
df_spaced

Unnamed: 0,name,start,end,fixed
0,Fixed Event 1,9,10,True
1,Flexible Event A,10,11,
2,Fixed Event 2,13,14,True
3,Flexible Event B,14,15,


In [13]:
import pandas as pd
from itertools import product

# Sample multi-course event data
courses = {
    "Course A": [
        {"name": "Fixed A1", "start": 9, "end": 10, "fixed": True},
        {"name": "Flexible A2", "slots": [(10, 11), (11, 12)], "fixed": False},
    ],
    "Course B": [
        {"name": "Fixed B1", "start": 13, "end": 14, "fixed": True},
        {"name": "Flexible B2", "slots": [(10, 11), (11, 12)], "fixed": False},
    ],
    "Course C": [
        {"name": "Fixed C1", "start": 10, "end": 11, "fixed": True},
        {"name": "Flexible C2", "slots": [(11, 12), (15, 16)], "fixed": False},
    ],
}


def schedule_multiple_courses(courses, preference="tight"):
    fixed_events = []
    flexible_events = []

    # Collect fixed and flexible events
    for course, events in courses.items():
        for event in events:
            if event["fixed"]:
                fixed_events.append(
                    {
                        "name": f"{course} - {event['name']}",
                        "start": event["start"],
                        "end": event["end"],
                    }
                )
            else:
                flexible_events.append(
                    {"name": f"{course} - {event['name']}", "slots": event["slots"]}
                )

    # Sort fixed events by time
    fixed_events.sort(key=lambda x: x["start"])

    best_schedule = None
    best_gap_score = float("inf") if preference == "tight" else float("-inf")

    # Try all slot combinations for flexible events
    for slot_combination in product(*[event["slots"] for event in flexible_events]):
        temp_schedule = fixed_events.copy()
        flexible_assignment = [
            {
                "name": flexible_events[i]["name"],
                "start": slot_combination[i][0],
                "end": slot_combination[i][1],
            }
            for i in range(len(flexible_events))
        ]

        temp_schedule += flexible_assignment
        temp_schedule.sort(key=lambda x: x["start"])

        # Check for clashes
        valid = True
        for i in range(len(temp_schedule) - 1):
            if temp_schedule[i]["end"] > temp_schedule[i + 1]["start"]:
                valid = False
                break

        if not valid:
            continue  # Skip this combination since it causes a clash

        # Calculate gap score
        gaps = [
            temp_schedule[i + 1]["start"] - temp_schedule[i]["end"]
            for i in range(len(temp_schedule) - 1)
        ]
        total_gap = sum(gaps)

        # Select best schedule based on preference
        if (preference == "tight" and total_gap < best_gap_score) or (
            preference == "spaced" and total_gap > best_gap_score
        ):
            best_schedule = temp_schedule
            best_gap_score = total_gap

    # If no valid schedule found, report clash
    if best_schedule is None:
        return "Unavoidable clash detected! Some events cannot be scheduled without overlapping."

    return pd.DataFrame(best_schedule)


# Get tight and spaced schedules
tight_schedule_df = schedule_multiple_courses(courses, "tight")
spaced_schedule_df = schedule_multiple_courses(courses, "spaced")

if isinstance(tight_schedule_df, str):
    print(tight_schedule_df)  # Print clash message
else:
    tight_schedule_df

Unavoidable clash detected! Some events cannot be scheduled without overlapping.


In [14]:
if isinstance(spaced_schedule_df, str):
    print(spaced_schedule_df)  # Print clash message
else:
    spaced_schedule_df

Unavoidable clash detected! Some events cannot be scheduled without overlapping.


In [15]:
tight_schedule_df

'Unavoidable clash detected! Some events cannot be scheduled without overlapping.'

In [16]:
spaced_schedule_df

'Unavoidable clash detected! Some events cannot be scheduled without overlapping.'

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("/home/gokul/gigs/hacktheburgh/data/uoe/tt_child_sem1_infr.csv")
df.head()

Unnamed: 0,Name of Module,Name,Day,Start Time,End Time,Weeks,course_code,fixed_event
0,Informatics Research Review,Informatics Research Review - Q&A Session,Wed,14:00:00,15:00:00,Sem1 wk1-Sem1 wk10,INFR11136,False
1,Discrete Mathematics and Probability,Discrete Mathematics and Probability - Lecture,Mon,14:00:00,15:00:00,Sem1 wk1-Sem1 wk11,INFR08031,False
2,Discrete Mathematics and Probability,Discrete Mathematics and Probability - Lecture,Thu,14:00:00,15:00:00,Sem1 wk1-Sem1 wk11,INFR08031,False
3,Machine Learning and Pattern Recognition,Machine Learning and Pattern Recognition - Lec...,Tue,09:00:00,10:00:00,Sem1 wk1-Sem1 wk10,INFR11130,False
4,Machine Learning and Pattern Recognition,Machine Learning and Pattern Recognition - Lec...,Thu,09:00:00,10:00:00,Sem1 wk1-Sem1 wk10,INFR11130,False


In [3]:
df["Weeks"].value_counts()

Weeks
Sem1 wk3-Sem1 wk10                                          60
Sem1 wk1-Sem1 wk11                                          40
Sem1 wk2-Sem1 wk11                                          38
Sem1 wk1-Sem1 wk10                                          27
Sem1 wk4, Sem1 wk6, Sem1 wk8, Sem1 wk10                     18
                                                            ..
Sem1 wk1-Sem1 wk2, Sem1 wk4-Sem1 wk7, Sem1 wk9-Sem1 wk11     1
Sem1 wk6, Sem1 wk8-Sem1 wk11                                 1
Sem1 wk1-Sem1 wk5                                            1
Sem1 wk2, Sem1 wk4, Sem1 wk6, Sem1 wk8                       1
Sem1 wk2-Sem1 wk6                                            1
Name: count, Length: 61, dtype: int64

In [4]:
from datetime import datetime
import re


# Convert time strings to numerical format for easier comparison
def time_to_float(time_str):
    """Converts time from HH:MM:SS format to float (e.g., 14:30:00 -> 14.5)."""
    time_obj = datetime.strptime(time_str, "%H:%M:%S")
    return time_obj.hour + time_obj.minute / 60


# Preprocess the DataFrame
df["Start Time"] = df["Start Time"].apply(time_to_float)
df["End Time"] = df["End Time"].apply(time_to_float)

# Convert fixed_event column to boolean
df["fixed_event"] = df["fixed_event"].astype(bool)

# Convert day names to numerical values for easier sorting (Monday=0, ..., Sunday=6)
day_mapping = {"Mon": 0, "Tue": 1, "Wed": 2, "Thu": 3, "Fri": 4, "Sat": 5, "Sun": 6}
df["Day"] = df["Day"].map(day_mapping)


def parse_weeks(s: str) -> set:
    # Split the string by comma to handle multiple intervals
    intervals = s.split(",")
    result = set()

    # Process each interval
    for interval in intervals:
        # Remove extra whitespace
        interval = interval.strip()
        # Find week numbers using regex: this finds all occurrences of "wk<number>"
        numbers = re.findall(r"wk(\d+)", interval)
        if len(numbers) == 2:
            start, end = int(numbers[0]), int(numbers[1])
            # Add all week numbers from start to end (inclusive)
            result.update(range(start, end + 1))

    return result


# Apply parsing to the dataset
df["Weeks"] = df["Weeks"].apply(parse_weeks)

In [5]:
print(df.shape)
df.head()

(364, 8)


Unnamed: 0,Name of Module,Name,Day,Start Time,End Time,Weeks,course_code,fixed_event
0,Informatics Research Review,Informatics Research Review - Q&A Session,2,14.0,15.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}",INFR11136,False
1,Discrete Mathematics and Probability,Discrete Mathematics and Probability - Lecture,0,14.0,15.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}",INFR08031,False
2,Discrete Mathematics and Probability,Discrete Mathematics and Probability - Lecture,3,14.0,15.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}",INFR08031,False
3,Machine Learning and Pattern Recognition,Machine Learning and Pattern Recognition - Lec...,1,9.0,10.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}",INFR11130,False
4,Machine Learning and Pattern Recognition,Machine Learning and Pattern Recognition - Lec...,3,9.0,10.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}",INFR11130,False


In [9]:
from itertools import product


def schedule_courses_check_conflict(course_codes, preference="tight"):
    """
    Schedules the given courses while avoiding clashes, considering week information.

    Parameters:
    - course_codes (list): List of course codes to schedule.
    - preference (str): "tight" for minimum gaps, "spaced" for maximum gaps.

    Returns:
    - DataFrame containing the optimal schedule or a message if no valid schedule exists.
    """
    selected_df = df[df["course_code"].isin(course_codes)].copy()

    fixed_events = []
    flexible_events = {}

    # Separate fixed and flexible events
    for _, row in selected_df.iterrows():
        event = {
            "name": f"{row['course_code']} - {row['Name']}",
            "day": row["Day"],
            "start": row["Start Time"],
            "end": row["End Time"],
            "weeks": row["Weeks"],
        }

        if row["fixed_event"]:
            fixed_events.append(event)
        else:
            if event["name"] not in flexible_events:
                flexible_events[event["name"]] = []
            flexible_events[event["name"]].append(
                (event["day"], event["start"], event["end"], event["weeks"])
            )

    # Sort fixed events
    fixed_events.sort(key=lambda x: (x["day"], x["start"]))

    best_schedule = None
    best_gap_score = float("inf") if preference == "tight" else float("-inf")

    # Generate all combinations for flexible events
    all_flexible_slots = list(product(*flexible_events.values()))

    for slot_combination in all_flexible_slots:
        temp_schedule = fixed_events.copy()

        # Add selected flexible slots
        flexible_assignment = [
            {
                "name": list(flexible_events.keys())[i],
                "day": slot[0],
                "start": slot[1],
                "end": slot[2],
                "weeks": slot[3],
            }
            for i, slot in enumerate(slot_combination)
        ]

        temp_schedule += flexible_assignment
        temp_schedule.sort(key=lambda x: (x["day"], x["start"]))

        # Check for clashes (considering weeks)
        valid = True
        for i in range(len(temp_schedule) - 1):
            event1, event2 = temp_schedule[i], temp_schedule[i + 1]

            # Check if the events are on the same day and time overlaps
            time_conflict = (event1["day"] == event2["day"]) and (
                event1["end"] > event2["start"]
            )

            # Check if the events occur in overlapping weeks
            week_conflict = bool(event1["weeks"] & event2["weeks"])

            if time_conflict and week_conflict:
                valid = False
                break

        if not valid:
            continue  # Skip this combination since it causes a clash

        # Calculate gap score
        gaps = [
            temp_schedule[i + 1]["start"] - temp_schedule[i]["end"]
            for i in range(len(temp_schedule) - 1)
            if temp_schedule[i]["day"] == temp_schedule[i + 1]["day"]
        ]
        total_gap = sum(gaps)

        # Select best schedule based on preference
        if (preference == "tight" and total_gap < best_gap_score) or (
            preference == "spaced" and total_gap > best_gap_score
        ):
            best_schedule = temp_schedule
            best_gap_score = total_gap

    # If no valid schedule found, report clash
    if best_schedule is None:
        return "Unavoidable clash detected! Some events cannot be scheduled without overlapping."

    return pd.DataFrame(best_schedule)


# Example usage (Modify as needed)
selected_courses = ["INFR11246", "INFR11262"]  # Example course codes
preference_type = "tight"  # Choose between "tight" or "spaced"

# Generate schedule considering corrected week logic
optimal_schedule_corrected = schedule_courses_check_conflict(
    selected_courses, preference_type
)

# Display results
if isinstance(optimal_schedule_corrected, str):
    print(optimal_schedule_corrected)  # Print clash message
else:
    optimal_schedule_corrected

In [10]:
optimal_schedule_corrected

Unnamed: 0,name,day,start,end,weeks
0,INFR11246 - Computer Graphics: Rendering - Tut...,0,10.0,11.0,"{2, 3, 4}"
1,INFR11246 - Computer Graphics: Rendering - Tut...,0,11.0,12.0,"{2, 3, 4}"
2,INFR11246 - Computer Graphics: Rendering - Tut...,0,12.0,13.0,"{2, 3, 4}"
3,INFR11246 - Computer Graphics: Rendering - Lec...,3,13.0,14.0,"{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}"
