In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import os
from collections import defaultdict # For easier subject counts
import operator
from tqdm.notebook import tqdm
import datetime
import re
import pickle
import random
# Optional: for better display in Jupyter
from IPython.display import display
from scipy.stats import truncnorm

In [2]:

def generate_master_timetables():
    """
    Generates and saves master academic timetables for all subject/year groups.
    """
    print("Step 1: Loading student data to identify all groups...")
    try:
        students_df = pd.read_csv('students_groups_social.csv')
    except FileNotFoundError:
        print("Error: 'students_groups_social.csv' not found. Please ensure the file is available.")
        return

    # Find all unique (Subject, Year) combinations from the student data
    unique_subject_years = students_df[['Subject', 'Year']].drop_duplicates().to_records(index=False)
    unique_subjects = students_df['Subject'].unique().tolist()
    
    print(f"Found {len(unique_subject_years)} unique subject/year combinations.")

    # --- Rule Generation ---
    # Categorize subjects to apply different rule templates
    stem_subjects = [s for s in unique_subjects if 'Eng' in s or 'Sci' in s or 'Math' in s or 'Comp' in s or 'Med' in s or 'Vet' in s]
    
    academic_rules = {}
    for subject, year in unique_subject_years:
        rule_key = (subject, year)
        # Default to Arts-style rules
        hours_per_day = 2
        days = [0, 2, 4] # Mon, Wed, Fri
        
        # STEM subjects have more contact hours
        if subject in stem_subjects:
            hours_per_day = 3
            days = [0, 1, 2, 3, 4] # Mon-Fri
            
        # Adjust hours based on year
        if year == 1:
            hours_per_day += 1 # More hours for first years
        elif year >= 3:
            hours_per_day -= 1 # Fewer formal lectures for later years
        
        academic_rules[rule_key] = {'hours': hours_per_day, 'days': days}

    print("Step 2: Generating a master timetable for each group...")
    NUM_TIMESTEPS = 4 * 7 * 24 #672
    master_timetables = {}
    lecture_hours_of_day = [9, 10, 11, 12, 14, 15, 16, 17] # Potential lecture slots

    for subject, year in unique_subject_years:
        rule_key = (subject, year)
        rule_details = academic_rules[rule_key]
        
        # Create a blank 672-hour schedule for this group
        schedule = ['Free'] * NUM_TIMESTEPS
        
        # Iterate through the 28 days of the simulation
        for day_num in range(28):
            current_datetime = datetime.datetime(2024, 10, 1) + datetime.timedelta(days=day_num)
            
            # Term starts Oct 9th, check if it's a valid lecture day
            if current_datetime.day >= 9 and current_datetime.weekday() in rule_details['days']:
                # Choose random available slots for the lectures on this day
                lecture_slots = np.random.choice(lecture_hours_of_day, rule_details['hours'], replace=False)
                
                for hour_slot in lecture_slots:
                    timestep_to_update = day_num * 24 + hour_slot
                    if timestep_to_update < NUM_TIMESTEPS:
                        schedule[timestep_to_update] = f"{subject} Lectures"
                        
        master_timetables[rule_key] = schedule
        
    print(f"Step 3: Saving {len(master_timetables)} master timetables to 'master_timetables.pkl'...")
    with open('master_timetables.pkl', 'wb') as f:
        pickle.dump(master_timetables, f)
        
    print("Done.")

def apply_master_timetables_efficiently():
    """
    Loads master timetables and applies them to students, creating the 
    final DataFrame in a single, high-performance operation.
    """
    print("Step 1: Loading prerequisite files...")
    try:
        with open('master_timetables.pkl', 'rb') as f:
            master_timetables = pickle.load(f)
        students_df = pd.read_csv('students_groups_social.csv')
    except FileNotFoundError as e:
        print(f"Error: A required file was not found. Please run the Part 1 script first. Missing: {e}")
        return

    print(f"Loaded {len(master_timetables)} master timetables and {len(students_df)} students.")
    
    NUM_TIMESTEPS = 4 * 7 * 24
    
    print("Step 2: Generating schedule data for all students in memory...")
    start_time = time.time()
    
    # This list will hold the schedule (as a list) for every student
    all_student_schedules = []

    # Iterate through each student to prepare their final schedule list
    for student_id, student_row in students_df.iterrows():
        # The default schedule is the student's college for all timesteps
        base_schedule = ['Free'] * NUM_TIMESTEPS
        
        rule_key = (student_row['Subject'], student_row['Year'])
        
        # If an academic timetable exists for them, apply it
        if rule_key in master_timetables:
            master_schedule = np.array(master_timetables[rule_key])
            # Use np.where to efficiently combine the base schedule with academic events
            final_schedule = np.where(master_schedule != 'Free', master_schedule, base_schedule)
            all_student_schedules.append(final_schedule.tolist())
        else:
            # Otherwise, their schedule is just the base schedule
            all_student_schedules.append(base_schedule)

    generation_time = time.time() - start_time
    print(f"Data generation complete in {generation_time:.2f} seconds.")

    print("Step 3: Creating the final DataFrame from the generated data (one operation)...")
    start_time = time.time()
    
    # Create the DataFrame in a single, efficient call
    timestep_columns = [f'T_{t}' for t in range(NUM_TIMESTEPS)]
    student_schedules_df = pd.DataFrame(
        data=all_student_schedules, 
        index=students_df.index, 
        columns=timestep_columns
    )
    student_schedules_df.index.name = 'StudentID'
    
    df_creation_time = time.time() - start_time
    print(f"DataFrame created in {df_creation_time:.2f} seconds.")
    
    output_filename = 'student_schedules_with_lectures.csv'
    print(f"Step 4: Saving the final populated schedules to '{output_filename}'...")
    student_schedules_df.to_csv(output_filename)
    
    print("Done.")

def schedule_meals():
    """
    Loads the academic schedules and populates them with lunch and dinner
    events based on group-shared timings and year-based probabilities.
    """
    print("Step 1: Loading prerequisite files...")
    try:
        # Load the schedules that already have academic events
        schedules_df = pd.read_csv('student_schedules_with_lectures.csv', index_col='StudentID')
        # Load student data to get College, Year, and Group ID
        students_df = pd.read_csv('students_groups_social.csv')
    except FileNotFoundError as e:
        print(f"Error: A required file was not found. Please ensure previous steps were run. Missing: {e}")
        return

    print(f"Loaded schedules for {len(schedules_df)} students.")

    # --- Step 2: Generate Meal Times for Each Group ---
    # We will create a master meal schedule for every group to ensure they eat together.
    unique_group_ids = students_df['Group ID'].dropna().unique()
    group_meal_times = {}
    
    LUNCH_HOURS = [12, 13]
    DINNER_HOURS = [17, 18, 19]
    NUM_DAYS = 4 * 7

    print(f"Generating shared meal timetables for {len(unique_group_ids)} groups...")
    for group_id in unique_group_ids:
        daily_schedule = []
        for day in range(NUM_DAYS):
            daily_schedule.append({
                'lunch_time': np.random.choice(LUNCH_HOURS),
                'dinner_time': np.random.choice(DINNER_HOURS)
            })
        group_meal_times[group_id] = daily_schedule
    
    # --- Step 3: Define Probability and Apply Schedules ---
    # Define the probability of eating in canteen by year
    canteen_probability_by_year = {
        1: 0.80,  # 80% for 1st years
        2: 0.70,  # 70% for 2nd years
        3: 0.60,  # 60% for 3rd years
        4: 0.40   # 40% for 4th years
    }
    # A default for any years outside 1-4
    default_prob = 0.50

    print("Step 3: Populating schedules with meal events...")
    # Iterate through each student in the main dataframe
    for student_id, student_row in students_df.iterrows():
        group_id = student_row['Group ID']
        # Skip students not in a group
        if pd.isna(group_id):
            continue

        student_year = student_row['Year']
        student_college = student_row['College']
        canteen_location = f"{student_college} Canteen" # e.g., "Christ's College Canteen"
        
        # Get the probability for this student's year
        prob = canteen_probability_by_year.get(student_year, default_prob)
        
        # Get the meal schedule for this student's group
        meal_schedule = group_meal_times[group_id]
        
        # Check each day of the simulation
        for day_num in range(NUM_DAYS):
            # --- Schedule Lunch ---
            lunch_hour = meal_schedule[day_num]['lunch_time']
            lunch_timestep = f"T_{day_num * 24 + lunch_hour}"
            
            # Check if the student is free for lunch
            is_free_for_lunch = "Lectures" not in str(schedules_df.at[student_id, lunch_timestep])
            
            if is_free_for_lunch:
                # Decide if they eat in the canteen based on probability
                if np.random.rand() < prob:
                    schedules_df.at[student_id, lunch_timestep] = canteen_location
            
            # --- Schedule Dinner ---
            dinner_hour = meal_schedule[day_num]['dinner_time']
            dinner_timestep = f"T_{day_num * 24 + dinner_hour}"

            # Check if the student is free for dinner
            is_free_for_dinner = "Lectures" not in str(schedules_df.at[student_id, dinner_timestep])
            
            if is_free_for_dinner:
                if np.random.rand() < prob:
                    schedules_df.at[student_id, dinner_timestep] = canteen_location

    output_filename = 'student_schedules_with_meals.csv'
    print(f"Step 4: Saving updated schedules to '{output_filename}'...")
    schedules_df.to_csv(output_filename)

    print("Done.")


In [3]:
generate_master_timetables()
apply_master_timetables_efficiently()

Step 1: Loading student data to identify all groups...
Found 107 unique subject/year combinations.
Step 2: Generating a master timetable for each group...
Step 3: Saving 107 master timetables to 'master_timetables.pkl'...
Done.
Step 1: Loading prerequisite files...
Loaded 107 master timetables and 11411 students.
Step 2: Generating schedule data for all students in memory...
Data generation complete in 3.99 seconds.
Step 3: Creating the final DataFrame from the generated data (one operation)...
DataFrame created in 0.51 seconds.
Step 4: Saving the final populated schedules to 'student_schedules_with_lectures.csv'...
Done.


In [4]:
schedule_meals()

Step 1: Loading prerequisite files...
Loaded schedules for 11411 students.
Generating shared meal timetables for 1837 groups...
Step 3: Populating schedules with meal events...
Step 4: Saving updated schedules to 'student_schedules_with_meals.csv'...
Done.


END OF THIS NOTEBOOK