In [1]:
!pip install --user gurobipy
import sys
print(sys.executable)

/opt/anaconda3/bin/python


In [2]:
# Import necessary libraries
import pandas as pd  # For data manipulation and analysis
from gurobipy import Model, GRB  # For optimization with Gurobi
import math  # For mathematical operations
import copy  # For creating deep copies of objects
import time  # For measuring execution time
from collections import defaultdict  # For default dictionary structures

# Record the start time of the script
start_time = time.time()

# Define the Excel file containing the scheduling data
excel_file = "Scheduling Project Pilot.xlsx"

# Load the data from the 'Assignments' sheet of the Excel file into a DataFrame
df = pd.read_excel(excel_file, sheet_name='Assignments')

# Rename columns for consistency and clarity
df = df.rename(columns={
    df.columns[0]: 'Course',       # Rename the first column to 'Course'
    df.columns[1]: 'Instructor',  # Rename the second column to 'Instructor'
    df.columns[2]: 'Capacity',    # Rename the third column to 'Capacity'
    df.columns[3]: '# Sections'   # Rename the fourth column to '# Sections'
})

# Drop unnecessary columns from the DataFrame (removes the last 4 columns)
df = df.iloc[:, :-4]

# Display the initial filtered DataFrame and the total number of sections
print("Initial DataFrame (df1):", df)
print("Total number of sections:", df["# Sections"].sum())

# Drop rows where the 'Course' column is empty or NaN
df = df.dropna(subset=['Course'])

# Extract numeric course numbers from the 'Course' column
# This assumes course names are in the format "CS113", "CS115", etc.
df['Course_Number'] = df['Course'].str.extract(r'(\d+)')

# Rename the 'Section Size' column to 'Capacity' for consistency
# This assumes there is a column named 'Section Size' that needs renaming
df.rename(columns={'Section Size': 'Capacity'}, inplace=True)

# At this point, the DataFrame is cleaned and prepared for further processing.


Initial DataFrame (df1):     Course          Instructor  Capacity  # Sections   70  Newark
0    CS114    Schieber, Baruch        20           1   20  Honors
1    CS114       Calvin, James        20           1   20  Honors
2    CS114  Zaidenberg, Ayelet        80           2  160  Newark
3    CS114   Kapleau, Jonathan        80           1   80  Newark
4    CS116             Wu, Jun        35           2   70  Newark
..     ...                 ...       ...         ...  ...     ...
132  IT340    Giannoglou, Karl        30           1   30  Newark
133  IT342    Giannoglou, Karl        30           1   30  Newark
134  IT420    Grayson, Tanisha        30           1   30  Newark
135  IT430     Senesy, Stanley        30           1   30  Newark
136  IT490       Patel, Dipesh        36           2   36  Newark

[137 rows x 6 columns]
Total number of sections: 167


In [3]:
def create_section_capacity_map(df):
    """
    Creates a dictionary mapping (course, instructor, section_number) to capacity.

    Args:
        df (DataFrame): DataFrame containing course, instructor, and capacity information.
                        Must include the following columns: 'Course', 'Instructor', '# Sections', and 'Capacity'.

    Returns:
        dict: A dictionary where keys are tuples (course, instructor, section_number)
              and values are the capacity of the sections.
    """
    # Step 1: Sort the DataFrame by 'Course', 'Instructor', and 'Capacity' for consistent section numbering
    df_sorted = df.sort_values(by=['Course', 'Instructor', 'Capacity'])

    # Initialize an empty dictionary to store the section-to-capacity mapping
    section_capacity_map = {}

    # Step 2: Group rows by (Course, Instructor)
    for (course, instructor), group in df_sorted.groupby(['Course', 'Instructor']):
        # Initialize section numbering for each (Course, Instructor) pair
        section_number = 1

        # Iterate over the grouped rows
        for _, row in group.iterrows():
            # Extract the number of sections and capacity for the current row
            num_sections = int(row['# Sections'])  # Ensure the value is an integer
            capacity = row['Capacity']  # Get the capacity value
            course = course.strip()  # Strip any leading/trailing whitespace from the course name
            instructor = instructor.strip()  # Strip whitespace from the instructor name

            # Step 3: Assign section numbers and map them to their capacity
            for _ in range(num_sections):
                # Add an entry to the dictionary for each section
                section_capacity_map[(course, instructor, section_number)] = capacity
                section_number += 1  # Increment the section number

    return section_capacity_map


In [4]:
def create_aggregated_dataframe(df):
    """
    Aggregates the DataFrame by grouping rows based on Course, Instructor, Course_Number, and Email,
    and summing the number of sections.

    Args:
        df (DataFrame): Original DataFrame with columns 'Course', 'Instructor', 'Course_Number', 'Email', and '# Sections'.

    Returns:
        DataFrame: Aggregated DataFrame with the total number of sections for each (Course, Instructor, Course_Number, Email) combination.
    """
    # Group by 'Course', 'Instructor', 'Course_Number', and 'Email', summing the '# Sections'
    aggregated_df = df.groupby(['Course', 'Instructor', 'Course_Number', 'Email']).agg(
        {'# Sections': 'sum'}  # Aggregate by summing the '# Sections'
    ).reset_index()

    return aggregated_df


# Load the 'pre-scheduled' sheet from the Excel file into a DataFrame
df_pre_scheduled = pd.read_excel(excel_file, sheet_name='pre-scheduled')

# Load the 'Faculty' sheet from the Excel file into a DataFrame
new_df = pd.read_excel(excel_file, sheet_name='Faculty')

# Rename the first column of new_df to 'Instructor' for consistency
new_df = new_df.rename(columns={new_df.columns[0]: 'Instructor'})
print("Faculty DataFrame (new_df):", new_df)

# Merge the main DataFrame (df) with the new DataFrame (new_df) on the 'Instructor' column
# Using a left join to retain all rows from df and only match data from new_df
df = pd.merge(df, new_df, on='Instructor', how='left')
print("Merged DataFrame:", df)


Faculty DataFrame (new_df):             Instructor              Email  8 Digit ID  \
0    Abduallah, Yasser      ya54@njit.edu    31234231   
1        Amin, Nadyrah     nma46@njit.edu    21280977   
2    Basu Roy, Senjuti  senjutib@njit.edu    31405694   
3    Apostolyuk, Vadym      va58@njit.edu    31005195   
4       Arafeh, Bassel      ba62@njit.edu    31544114   
..                 ...                ...         ...   
178      Zhang, Genwei       gz6@njit.edu    31521469   
179       Yuan, Chenxi     cy324@njit.edu    31702048   
180         Zhang, Lei     lz392@njit.edu    31702050   
181       Zhang, Yijie     yz829@njit.edu    31481772   
182   Zunnurhain, Kazi  kzunnurh@njit.edu    31558650   

                       Job Title  
0                Adjunct, Tier I  
1                Adjunct, Tier I  
2           Professor, Associate  
3              Adjunct, Tier III  
4    University Lecturer, Senior  
..                           ...  
178                  PhD student  
179    

In [5]:
# Step 1: Generate the section capacity map
section_capacity_map = create_section_capacity_map(df)

# Optional: Print the section capacity map for debugging (commented out for now)
# print("Section Capacity Map:")
# for key, value in section_capacity_map.items():
#     print(f"{key}: Capacity {value}")

# Step 2: Create an aggregated DataFrame to sum up sections for each (Course, Instructor)
aggregated_df = create_aggregated_dataframe(df)

# Debugging: Print the aggregated DataFrame
print("\nAggregated DataFrame:")
print(aggregated_df)

# Deep copy the aggregated DataFrame for further use
df = copy.deepcopy(aggregated_df)

# Time slots and days definition
time_slots = [
    "8:30-10:00 AM", "10:00-11:30 AM", "11:30-1:00 PM",
    "1:00-2:30 PM", "2:30-4:00 PM", "4:00-5:30 PM",
    "6:00-7:30 PM", "7:30-9:00 PM"
]
days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]

# Define balance percentages for each (day, time slot) combination
# Values are derived from a provided table or predefined distribution
slot_percentages = {
    ("Monday", "8:30-10:00 AM"): 0.20,
    ("Tuesday", "8:30-10:00 AM"): 0.20,
    ("Wednesday", "8:30-10:00 AM"): 0.25,
    ("Thursday", "8:30-10:00 AM"): 0.20,
    ("Friday", "8:30-10:00 AM"): 0.20,
    # (Add other time slot-day combinations as needed)
}

# Initialize Gurobi model for scheduling
model = Model("Scheduling")

# Step 3: Define binary decision variables for each (course, instructor, section, part, day, slot)
variables = {}
slack_vars = {}  # Dictionary to hold slack variables for potential constraints
total_slack = 0  # Initialize total slack for minimization

# Iterate over rows of the DataFrame to create variables
for _, row in df.iterrows():
    course = row['Course']
    instructor = row['Instructor']
    num_sections = int(row['# Sections'])  # Number of sections for the course

    # Skip courses with zero sections
    if num_sections == 0:
        continue

    # Special adjustment for "CS435": Divide it into 3 parts instead of 2
    if course == "CS435":
        parts = [1, 2, 3]
    else:
        parts = [1, 2]

    # Iterate through sections, parts, days, and time slots to create binary variables
    for section_id in range(1, num_sections + 1):
        for part in parts:  # Adjust based on the number of parts
            for day in days:
                for slot in time_slots:
                    # Create a unique variable name for this configuration
                    var_name = f"X_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"
                    # Add the binary variable to the model
                    variables[var_name] = model.addVar(vtype=GRB.BINARY, name=var_name)

# At this point, the binary variables have been defined for all relevant combinations.



Aggregated DataFrame:
    Course          Instructor Course_Number              Email  # Sections
0    CS114       Calvin, James           114    calvin@njit.edu           1
1    CS114   Kapleau, Jonathan           114   kapleau@njit.edu           1
2    CS114    Schieber, Baruch           114      sbar@njit.edu           1
3    CS114  Zaidenberg, Ayelet           114      acz6@njit.edu           2
4    CS116             Wu, Jun           116      jw65@njit.edu           2
..     ...                 ...           ...                ...         ...
124  IT340     Senesy, Stanley           340    senesy@njit.edu           2
125  IT342    Giannoglou, Karl           342     kg338@njit.edu           1
126  IT420    Grayson, Tanisha           420  tgrayson@njit.edu           1
127  IT430     Senesy, Stanley           430    senesy@njit.edu           1
128  IT490       Patel, Dipesh           490   pateldi@njit.edu           2

[129 rows x 5 columns]
Set parameter Username
Set parameter Lice

In [6]:
# Step 1: Calculate the total number of section parts (2N for most courses, with an adjustment for "CS435")
total_section_parts = (
    2 * sum(int(row['# Sections']) for _, row in df.iterrows()) + 
    df[df["Course"] == "CS435"]["# Sections"].sum()  # Add an extra section part for CS435
)
print("Total section parts:", total_section_parts)

# Step 2: Group the pre-scheduled DataFrame to count the number of pre-scheduled courses by (Day, Time)
pre_scheduled_counts = df_pre_scheduled.groupby(['Day', 'Time']).size().to_dict()

# Initialize a counter to keep track of the sum of maximum section parts across all slots
s = 0

# Step 3: Add constraints to balance the number of section parts per (Day, Slot)
for (day, slot), percentage in slot_percentages.items():
    # Filter variables related to the current (Day, Slot)
    slot_vars = [
        variables[var_name] for var_name in variables 
        if var_name.split('_')[5] == day and var_name.split('_')[6] == slot
    ]

    # Calculate the maximum allowable section parts for this slot
    # The formula is based on the percentage and total section parts
    max_section_parts_slot = math.ceil((percentage / 6) * total_section_parts)
    s += max_section_parts_slot  # Add to the overall balance sum

    # Add constraints to the Gurobi model
    if percentage > 0:  # Only include slack variables if the percentage is non-zero
        model.addConstr(
            sum(slot_vars) <= max_section_parts_slot,  # Constrain the sum of variables for this slot
            name=f"balance_slot_with_slack_{day}_{slot}"  # Name the constraint
        )
    else:  # For zero percentage slots, still ensure the constraint
        model.addConstr(
            sum(slot_vars) <= max_section_parts_slot,
            name=f"balance_slot_no_slack_{day}_{slot}"  # Name the constraint
        )

# Debugging: Print the sum of the balances across all slots
print("Sum of the balances is:", s)


Total section parts: 338
Sum of the balances is: 63


In [7]:
# Define the percentage allocation for evening slots
evening_percentage = 0.20  
evening_slots = ["6:00-7:30 PM", "7:30-9:00 PM"]  # Evening slots

# Add constraints for evening slots
for slot in evening_slots:
    # Filter variables corresponding to the current evening slot
    evening_vars = [
        variables[var_name] for var_name in variables 
        if var_name.split('_')[6] == slot
    ]

    # Adjust for pre-scheduled courses in the evening slot (e.g., Friday evening)
    pre_scheduled_count = pre_scheduled_counts.get(("Friday", slot), 0)  # Default to 0 if no pre-scheduled courses

    # Calculate the maximum allowable section parts for the slot
    max_section_parts_evening = (evening_percentage / 6) * total_section_parts + pre_scheduled_count

    # Add a constraint to enforce the balance for the current evening slot
    model.addConstr(
        sum(evening_vars) <= max_section_parts_evening,
        name=f"balance_evening_{slot}"  # Descriptive constraint name for debugging
    )



In [8]:
# Step 1: Read the "pre-scheduled" sheet
df_pre_scheduled = pd.read_excel(excel_file, sheet_name='pre-scheduled')

# Debugging: Print the column names to confirm the structure of the pre-scheduled DataFrame
print("Pre-scheduled DataFrame columns:", df_pre_scheduled.columns)

# Step 2: Enforce pre-scheduled constraints
# Ensure no other courses for this instructor can be scheduled at the specified (day, time)

for _, row in df_pre_scheduled.iterrows():
    instructor = row['Instructor']  # Instructor for the pre-scheduled course
    day = row['Day']  # Day of the pre-scheduled course
    times = row['Time']  # Time slot of the pre-scheduled course

    # Filter variables for the given (instructor, day, time)
    conflicting_vars = [
        var_name for var_name in variables 
        if var_name.split('_')[2] == instructor and  # Match instructor
           var_name.split('_')[5] == day and         # Match day
           var_name.split('_')[6] == times          # Match time slot
    ]

    # Add constraints to set conflicting variables to 0
    for var_name in conflicting_vars:
        model.addConstr(
            variables[var_name] == 0, 
            name=f"block_{instructor}_{day}_{times}"  # Descriptive constraint name
        )


Pre-scheduled DataFrame columns: Index(['Course', 'Section ', 'Instructor', 'Capacity', 'Day', 'Time'], dtype='object')


In [9]:
# Step 1: Initialize a counter to keep track of constraint numbering
constraint_counter = 0

# Step 2: Iterate through each row in the DataFrame to add constraints
for _, row in df.iterrows():
    course = row['Course']  # Course name
    instructor = row['Instructor']  # Instructor's name
    num_sections = int(row['# Sections'])  # Number of sections for the course

    # Skip courses with no sections
    if num_sections == 0:
        continue

    # Step 3: Adjust for specific courses like "CS435" which requires 3 parts instead of 2
    if course == "CS435":
        parts = [1, 2, 3]  # Define parts for CS435
    else:
        parts = [1, 2]  # Default case with 2 parts

    # Step 4: Add constraints for each section, part, day, and time slot
    for section_id in range(1, num_sections + 1):
        for part in parts:
            # Collect variables for the current section and part
            section_vars = []
            for day in days:
                for slot in time_slots:
                    var_name = f"X_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"
                    
                    # Check if the variable exists in the model
                    if var_name in variables:
                        var = variables[var_name]
                        section_vars.append(var)

            # Step 5: If there are valid variables, add a constraint to ensure one and only one slot is selected
            if section_vars:
                # Name the constraint for easier debugging
                constraint_name = f"unique_slot_{course}_{instructor}_{section_id}_{part}"
                
                # Add the constraint to the model
                model.addConstr(sum(section_vars) == 1, name=constraint_name)
                
                # Increment the constraint counter
                constraint_counter += 1

# Final step: Print the total number of constraints added (optional)
print(f"Total constraints added: {constraint_counter}")


Total constraints added: 338


In [10]:
# Step: Add constraints to ensure that an instructor has at most one section per (day, slot)
for instructor in df['Instructor'].unique():  # Iterate through each unique instructor
    for day in days:  # Iterate through each day
        for slot in time_slots:  # Iterate through each time slot
            instructor_vars = []  # List to collect variables for this instructor, day, and slot

            # Filter the DataFrame for the current instructor
            for _, row in df[df['Instructor'] == instructor].iterrows():
                course = row['Course']  # Course name
                num_sections = int(row['# Sections'])  # Number of sections for the course

                # Adjust the parts for specific courses like "CS435"
                parts = [1, 2, 3] if course == "CS435" else [1, 2]

                # Iterate through sections and parts to build variable names
                for section_id in range(1, num_sections + 1):
                    for part in parts:
                        # Construct the variable name
                        var_name = f"X_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"
                        
                        # Check if the variable exists in the model and append it
                        if var_name in variables:
                            instructor_vars.append(variables[var_name])

            # Add a constraint to ensure at most one section is assigned to this (instructor, day, slot)
            if instructor_vars:
                constraint_name = f"one_section_per_slot_{instructor}_{day}_{slot}"  # Descriptive name
                model.addConstr(
                    sum(instructor_vars) <= 1, 
                    name=constraint_name  # Add the constraint to the model
                )


In [11]:
import gurobipy as gp

# Step 1: Define restricted time slots for the constraint
restricted_time_slots = ["8:30-10:00 AM", "10:00-11:30 AM", "6:00-7:30 PM", "7:30-9:00 PM"]

# Step 2: Add constraints for each instructor on each day
for instructor in df['Instructor'].unique():  # Iterate through all unique instructors
    for day in days:  # Iterate through all days
        # Collect binary variables corresponding to the restricted time slots for this instructor and day
        restricted_vars = []
        
        for course in df['Course'].unique():  # Iterate through all unique courses
            # Filter the DataFrame for the specific (instructor, course) combination
            instructor_courses = df[(df['Instructor'] == instructor) & (df['Course'] == course)]
            
            for _, course_row in instructor_courses.iterrows():
                num_sections = int(course_row['# Sections'])  # Number of sections for this course
                parts = [1, 2] if course != "CS435" else [1, 2, 3]  # Adjust parts for CS435
                
                for section_id in range(1, num_sections + 1):  # Iterate through sections
                    for part in parts:  # Iterate through parts
                        for slot in restricted_time_slots:  # Iterate through restricted slots
                            # Construct the variable name
                            var_name = f"X_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"
                            
                            # Add the variable to the restricted_vars list if it exists
                            if var_name in variables:
                                restricted_vars.append(variables[var_name])
        
        # Step 3: Add the constraint to ensure at most 3 of the 4 restricted slots are assigned
        if restricted_vars:
            model.addConstr(
                gp.quicksum(restricted_vars) <= 3,  # Ensure the sum of the variables does not exceed 3
                name=f"restricted_time_slots_{instructor}_{day}"  # Name the constraint for debugging
            )


In [12]:
# Step 1: Define valid start times for graduate-style patterns
valid_start_times = ["8:30-10:00 AM", "6:00-7:30 PM"]

# Step 2: Define additional valid start times for Fridays
friday_start_times = [
    "8:30-10:00 AM", "1:00-2:30 PM", "2:30-4:00 PM", 
    "4:00-5:30 PM", "6:00-7:30 PM", "7:30-9:00 PM"
]

# Step 3: Add constraints for graduate and undergraduate patterns
for course in df['Course'].unique():
    for instructor in df['Instructor'].unique():
        # Filter rows for the specific (course, instructor) combination
        course_instructor_rows = df[(df['Course'] == course) & (df['Instructor'] == instructor)]
        if course_instructor_rows.empty:
            continue

        # Iterate through each section
        for section_id in range(1, course_instructor_rows.iloc[0]['# Sections'] + 1):
            # Initialize binary variables for graduate and undergraduate patterns
            grad_var = model.addVar(vtype=GRB.BINARY, name=f"Grad_{course}_{instructor}_{section_id}")
            undergrad_var = model.addVar(vtype=GRB.BINARY, name=f"Undergrad_{course}_{instructor}_{section_id}")
            
            # Dictionary to store Y variables for each (day1, slot1) -> (day2, slot2) pair
            y_var_dict = {}
            compatible_pairs = []  # Track all Y variables for the section

            # Step 4: Generate Y variables for all (day1, slot1), (day2, slot2) combinations
            for day1 in days:
                for slot1 in time_slots:
                    for day2 in days:
                        for slot2 in time_slots:
                            y_var_name = f"Y_{course}_{instructor}_{section_id}_{day1}_{slot1}_{day2}_{slot2}"
                            y_var = model.addVar(vtype=GRB.BINARY, name=y_var_name)
                            y_var_dict[(day1, slot1, day2, slot2)] = y_var
                            compatible_pairs.append(y_var)

                            # Graduate pattern: Consecutive slots on the same day
                            if day1 == day2:
                                if day1 == "Friday":
                                    if slot1 in friday_start_times and time_slots.index(slot2) == time_slots.index(slot1) + 1:
                                        model.addConstr(y_var <= grad_var, name=f"grad_pair_friday_{course}_{section_id}_{day1}_{slot1}_{slot2}")
                                    else:
                                        model.addConstr(y_var == 0, name=f"invalid_grad_friday_{course}_{section_id}_{day1}_{slot1}_{slot2}")
                                else:
                                    if slot1 in valid_start_times and time_slots.index(slot2) == time_slots.index(slot1) + 1:
                                        model.addConstr(y_var <= grad_var, name=f"grad_pair_{course}_{section_id}_{day1}_{slot1}_{slot2}")
                                    else:
                                        model.addConstr(y_var == 0, name=f"invalid_grad_non_friday_{course}_{section_id}_{day1}_{slot1}_{slot2}")

                            # Undergraduate pattern: Same slot, valid day pairs
                            elif day1 != day2 and slot1 == slot2:
                                valid_day_pairs = {
                                    "Monday": ["Wednesday", "Thursday"],
                                    "Tuesday": ["Thursday", "Friday"],
                                    "Wednesday": ["Friday"]
                                }
                                if day2 in valid_day_pairs.get(day1, []):
                                    model.addConstr(y_var <= undergrad_var, name=f"undergrad_pair_{course}_{section_id}_{day1}_{slot1}_{day2}_{slot2}")
                                else:
                                    model.addConstr(y_var == 0, name=f"invalid_undergrad_{course}_{section_id}_{day1}_{slot1}_{day2}_{slot2}")
                            else:
                                model.addConstr(y_var == 0, name=f"invalid_pair_{course}_{section_id}_{day1}_{slot1}_{day2}_{slot2}")

            # Step 5: Enforce additional constraints
            # Restrict courses where the third character is "7" from being scheduled at 8:30-10:00 AM
            if course[2] == "7":
                for day1 in days:
                    if "8:30-10:00 AM" in time_slots:
                        var_part1 = f"X_{course}_{instructor}_{section_id}_1_{day1}_8:30-10:00 AM"
                        if var_part1 in variables:
                            model.addConstr(variables[var_part1] == 0, name=f"no_8_30_to_10_CS7XX_{course}_{section_id}_{day1}")

            # Link Part 1 variables to their corresponding Y variables
            for day1 in days:
                for slot1 in time_slots:
                    var_part1 = f"X_{course}_{instructor}_{section_id}_1_{day1}_{slot1}"
                    if var_part1 in variables:
                        y_vars_for_part1 = [y_var_dict[(day1, slot1, day2, slot2)] for day2 in days for slot2 in time_slots]
                        model.addConstr(sum(y_vars_for_part1) == variables[var_part1], name=f"part1_link_{course}_{section_id}_{day1}_{slot1}")

            # Link Part 2 variables to their corresponding Y variables
            for day2 in days:
                for slot2 in time_slots:
                    var_part2 = f"X_{course}_{instructor}_{section_id}_2_{day2}_{slot2}"
                    if var_part2 in variables:
                        y_vars_for_part2 = [y_var_dict[(day1, slot1, day2, slot2)] for day1 in days for slot1 in time_slots]
                        model.addConstr(sum(y_vars_for_part2) == variables[var_part2], name=f"part2_link_{course}_{section_id}_{day2}_{slot2}")

            # Ensure exactly one (day1, slot1), (day2, slot2) pair is selected
            model.addConstr(sum(compatible_pairs) == 1, name=f"select_one_pair_{course}_{section_id}")

            # Ensure only one pattern (graduate or undergraduate) is chosen
            model.addConstr(grad_var + undergrad_var == 1, name=f"select_one_pattern_{course}_{section_id}")


In [13]:
# Define the restricted day and time slot
restricted_day = "Monday"
restricted_time_slot = "4:00-5:30 PM"

# Add the constraint: Only courses with course number > 199 and capacity < 35 can be scheduled in the restricted slot
for _, row in df.iterrows():
    course = row['Course']  # Course name
    instructor = row['Instructor']  # Instructor's name
    course_number = int(row['Course_Number'])  # Extract course number
    num_sections = int(row['# Sections'])  # Number of sections for this course

    # Iterate through each section of the course
    for section_id in range(1, num_sections + 1):
        # Get the capacity of the current section
        capacity = section_capacity_map.get((course, instructor, section_id))

        # Check if the course meets the scheduling conditions for the restricted slot
        if course_number > 199 and capacity < 35:
            # The course meets the conditions, no constraint is needed
            continue

        # If the course does not meet the conditions, restrict its scheduling in the restricted slot
        if course == "CS435":
            parts = [1, 2, 3]  # Special case for CS435 with 3 parts
        else:
            parts = [1, 2]  # Default case with 2 parts

        for part in parts:
            # Construct the variable name for this section, part, day, and time slot
            var_name = f"X_{course}_{instructor}_{section_id}_{part}_{restricted_day}_{restricted_time_slot}"

            # Add a constraint to prevent scheduling in the restricted slot if the variable exists
            if var_name in variables:
                model.addConstr(
                    variables[var_name] == 0,
                    name=f"restricted_slot_{course}_{instructor}_{section_id}_{part}_{restricted_day}_{restricted_time_slot}"
                )


In [14]:
# Define course blocks and special blocks with constraints
course_blocks = [
    ['CS114', 'IS210', 'CS450', 'CS337'],
    ['CS241', 'CS280', 'IS350'],
    ['CS288', 'CS332', 'CS301', 'CS356'],  # Special block (<= 2)
    ['CS341', 'CS350', 'CS351', 'CS331', 'CS375'],  # Special block (<= 2)
    ['CS435', 'CS490', 'CS485', 'CS370', 'CS375'],
    ['CS485', 'CS491', 'CS450', 'CS482'],
    ['CS610', 'CS630', 'CS631', 'CS656', 'DS675', 'CS675', 'CS670'],  # Grad-core block
    ['DS677', 'DS669', 'DS650', 'CS670', 'CS610', 'CS665', 'CS667', 'CS732', 'DS680'],  # Grad-DS+Alg block
    ['CS608', 'CS645', 'CS646', 'CS647', 'CS648', 'CS678', 'CS696'],  # Grad-cyber block
    ['IS455', 'IS645'],
    ['IT220', 'IT230', 'IT240', 'IT302'],
    ['IT256', 'IT266', 'IT286', 'IT360', 'IT380', 'IT383', 'IT386'],
    ['IT120', 'IT240']
]

# List of special blocks with <= 2 constraints
special_blocks = [
    ['CS288', 'CS332', 'CS301', 'CS356'],
    ['CS341', 'CS350', 'CS351', 'CS331', 'CS375']
]

# Add constraints to ensure courses in the same block are not scheduled in the same day and time slot
for block in course_blocks:
    # Determine the max constraint for the current block (1 for regular blocks, 2 for special blocks)
    max_constraint = 2 if block in special_blocks else 1

    for course1 in block:
        for course2 in block:
            # Avoid self-comparison
            if course1 != course2:
                # Iterate over instructors for both courses
                instructors1 = df[df['Course'] == course1]['Instructor'].unique()
                instructors2 = df[df['Course'] == course2]['Instructor'].unique()

                for instructor1 in instructors1:
                    for instructor2 in instructors2:
                        # Iterate over all days and time slots
                        for day in days:
                            for slot in time_slots:
                                # Construct variable names for both courses and both parts
                                var_course1_part1 = f"X_{course1}_{instructor1}_1_{day}_{slot}"
                                var_course1_part2 = f"X_{course1}_{instructor1}_2_{day}_{slot}"
                                var_course2_part1 = f"X_{course2}_{instructor2}_1_{day}_{slot}"
                                var_course2_part2 = f"X_{course2}_{instructor2}_2_{day}_{slot}"

                                # Add constraints for part 1
                                if var_course1_part1 in variables and var_course2_part1 in variables:
                                    model.addConstr(
                                        variables[var_course1_part1] + variables[var_course2_part1] <= max_constraint,
                                        name=f"block_constraint_{course1}_{instructor1}_{course2}_{instructor2}_{day}_{slot}_part1"
                                    )

                                # Add constraints for part 2
                                if var_course1_part2 in variables and var_course2_part2 in variables:
                                    model.addConstr(
                                        variables[var_course1_part2] + variables[var_course2_part2] <= max_constraint,
                                        name=f"block_constraint_{course1}_{instructor1}_{course2}_{instructor2}_{day}_{slot}_part2"
                                    )


In [15]:
# Define the specific time slots for part 1 and part 2
part1_slot = "6:00-7:30 PM"
part2_slot = "7:30-9:00 PM"

# Add constraints to ensure part 1 and part 2 are scheduled consecutively on the same day
for _, row in df.iterrows():
    course = row['Course']  # Course name
    instructor = row['Instructor']  # Instructor's name
    num_sections = int(row['# Sections'])  # Number of sections for the course

    # Skip courses with no sections
    if num_sections == 0:
        continue

    # Iterate through each section of the course
    for section_id in range(1, num_sections + 1):
        for day in days:  # Loop through each day
            # Construct variable names for part 1 and part 2 for the given time slots
            part1_var_name = f"X_{course}_{instructor}_{section_id}_1_{day}_{part1_slot}"
            part2_var_name = f"X_{course}_{instructor}_{section_id}_2_{day}_{part2_slot}"

            # Check if both variables exist in the model
            if part1_var_name in variables and part2_var_name in variables:
                # Add a constraint to ensure that part 1 at 6:00-7:30 PM matches part 2 at 7:30-9:00 PM on the same day
                model.addConstr(
                    variables[part1_var_name] == variables[part2_var_name],
                    name=f"timing_constraint_{course}_{instructor}_{section_id}_{day}"  # Descriptive constraint name
                )


In [16]:
# Iterate over each instructor
for instructor in df['Instructor'].unique():
    # Iterate over all days
    for day in days:
        # Iterate over all possible starting time slots (first of three consecutive slots)
        for i in range(len(time_slots) - 2):
            slot1 = time_slots[i]
            slot2 = time_slots[i + 1]
            slot3 = time_slots[i + 2]

            # Filter the DataFrame to get only the courses and sections taught by the current instructor
            instructor_df = df[df['Instructor'] == instructor]

            # Iterate over the first course, section, and part
            for _, row1 in instructor_df.iterrows():
                course1 = row1['Course']
                num_sections1 = int(row1['# Sections'])
                if num_sections1 == 0:
                    continue

                for section_id1 in range(1, num_sections1 + 1):
                    parts1 = [1, 2, 3] if course1 == "CS435" else [1, 2]
                    for part1 in parts1:
                        var_name1 = f"X_{course1}_{instructor}_{section_id1}_{part1}_{day}_{slot1}"

                        # Iterate over the second course, section, and part
                        for _, row2 in instructor_df.iterrows():
                            course2 = row2['Course']
                            num_sections2 = int(row2['# Sections'])
                            if num_sections2 == 0:
                                continue

                            for section_id2 in range(1, num_sections2 + 1):
                                parts2 = [1, 2, 3] if course2 == "CS435" else [1, 2]
                                for part2 in parts2:
                                    var_name2 = f"X_{course2}_{instructor}_{section_id2}_{part2}_{day}_{slot2}"

                                    # Check if var_name1 and var_name2 are different
                                    if course1 != course2 or section_id1 != section_id2 or part1 != part2:
                                        # Iterate over the third course, section, and part
                                        for _, row3 in instructor_df.iterrows():
                                            course3 = row3['Course']
                                            num_sections3 = int(row3['# Sections'])
                                            if num_sections3 == 0:
                                                continue

                                            for section_id3 in range(1, num_sections3 + 1):
                                                parts3 = [1, 2, 3] if course3 == "CS435" else [1, 2]
                                                for part3 in parts3:
                                                    var_name3 = f"X_{course3}_{instructor}_{section_id3}_{part3}_{day}_{slot3}"

                                                    # Ensure var_name3 is different from var_name1 and var_name2
                                                    if (course1 != course3 or section_id1 != section_id3 or part1 != part3) and \
                                                       (course2 != course3 or section_id2 != section_id3 or part2 != part3):
                                                        # Sum the decision variables for the three consecutive slots
                                                        consecutive_sum = 0
                                                        if var_name1 in variables:
                                                            consecutive_sum += variables[var_name1]
                                                        if var_name2 in variables:
                                                            consecutive_sum += variables[var_name2]
                                                        if var_name3 in variables:
                                                            consecutive_sum += variables[var_name3]

                                                        # Add the constraint that the sum of these variables must be <= 2
                                                        model.addConstr(
                                                            consecutive_sum <= 2,
                                                            name=f"consecutive_slots_constraint_{instructor}_{day}_{slot1}_{slot2}_{slot3}"
                                                        )


In [17]:
# Initialize total points for the model
total_points = 0  

# Define mappings for days and time slots
time_slot_mapping = {
    'M': 'Monday', 
    'T': 'Tuesday', 
    'W': 'Wednesday', 
    'R': 'Thursday', 
    'F': 'Friday',
    'S': 'Saturday'  # Include 'S' for completeness, even if unused
}

time_slot_index = {
    '1': "8:30-10:00 AM",
    '2': "10:00-11:30 AM",
    '3': "11:30-1:00 PM",
    '4': "1:00-2:30 PM",
    '5': "2:30-4:00 PM",
    '6': "4:00-5:30 PM",
    '7': "6:00-7:30 PM",
    '8': "7:30-9:00 PM"
}

# Read instructor constraints (Health/Religion) from the Excel file
df_constraints = pd.read_excel(excel_file, sheet_name='Constraints & Preferences')

# Process each constraint
for _, row in df_constraints.iterrows():
    instructor_info = row['Instructor UCID: Type']
    slots = row['Slots']

    # Skip rows with missing or invalid instructor information
    if isinstance(instructor_info, float):
        continue

    # Parse the instructor UCID and constraint type
    email, constraint_type = instructor_info.split(": ")

    # Only process constraints for "Health" or "Religion" types
    if constraint_type.strip() in ["Health", "Religion"]:
        # Extract the blocked time slots for this instructor
        blocked_slots = slots.split("|")[1:-1]  # Remove empty elements from the split

        for slot_code in blocked_slots:
            # Map the slot code to the full day and time slot
            day_abbrev = slot_code[0]  # First character (M, T, W, R, F)
            time_slot_num = slot_code[1]  # Second character (1-8)
            day_full = time_slot_mapping[day_abbrev]
            time_slot_full = time_slot_index[time_slot_num]

            # Find the instructor's name from the main DataFrame
            instructor_row = df[df['Email'] == email]

            if not instructor_row.empty:
                instructor_name = instructor_row['Instructor'].iloc[0]  # Extract instructor name

                # Process all courses taught by the instructor
                for course in df[df['Instructor'] == instructor_name]['Course']:
                    filtered_df = df[df['Course'] == course]

                    # Only proceed if the course exists
                    if not filtered_df.empty:
                        num_sections = int(filtered_df['# Sections'].iloc[0])

                        # Iterate through each section and part of the course
                        for section_id in range(1, num_sections + 1):
                            parts = [1, 2] if course != "CS435" else [1, 2, 3]  # CS435 has 3 parts

                            for part in parts:
                                # Construct the variable name
                                var_name = f"X_{course}_{instructor_name}_{section_id}_{part}_{day_full}_{time_slot_full}"

                                # Check if the variable exists in the model
                                if var_name in variables:
                                    # Create a slack variable for this constraint
                                    slack_var_name = f"Slack_{course}_{instructor_name}_{section_id}_{part}_{day_full}_{time_slot_full}"
                                    slack_var = model.addVar(vtype=GRB.BINARY, name=slack_var_name)

                                    # Add the soft constraint: allow the variable to be scheduled with a slack penalty
                                    model.addConstr(
                                        variables[var_name] <= slack_var,
                                        name=f"health_religion_constraint_{instructor_name}_{day_full}_{time_slot_full}"
                                    )

                                    # Apply a penalty of 2048 points for violating the constraint
                                    total_points -= 2048 * slack_var


In [18]:
# Initialize trackers for penalties and soft violations
instructor_penalty_tracker = {}
instructor_soft_violated = []

# Process each constraint in the "Constraints & Preferences" sheet
for idx, row in df_constraints.iterrows():
    instructor_info = row['Instructor UCID: Type']
    slots = row['Slots']

    # Skip rows with missing or invalid instructor information
    if isinstance(instructor_info, float):
        break

    # Parse the instructor UCID and constraint type
    email, constraint_type = instructor_info.split(": ")

    # Parse the blocked time slots for this instructor
    blocked_slots = slots.split("|")[1:-1]  # Remove empty elements from the split

    # Assign point values based on constraint type
    if constraint_type.strip() == "Pref-1":
        points = 8
    elif constraint_type.strip() == "Pref-2":
        points = 4
    elif constraint_type.strip() == "Pref-3":
        points = 2
    elif constraint_type.strip() == "Childcare":
        points = -1024
    elif constraint_type.strip() in ["Health", "Religion"]:
        continue  # Health/Religion constraints are handled as hard constraints
    else:
        points = -8  # Default negative points for other types

    # Process each blocked slot for the instructor
    for slot_code in blocked_slots:
        try:
            # Validate the slot code length
            if len(slot_code) < 2:
                print(f"Error in row {idx}, email: {email}, constraint: {constraint_type}, slot code: '{slot_code}' (invalid length)")
                continue

            # Extract day and time slot details
            day_abbrev = slot_code[0]  # M, T, W, R, F
            time_slot_num = slot_code[1]  # 1-8
            day_full = time_slot_mapping[day_abbrev]
            time_slot_full = time_slot_index[time_slot_num]

            # Find the instructor's name using their email
            instructor_row = df[df['Email'] == email]
            if instructor_row.empty:
                continue

            instructor_name = instructor_row['Instructor'].iloc[0]

            # Apply the points for all sections and parts of the instructor's courses
            for course in df[df['Instructor'] == instructor_name]['Course']:
                filtered_df = df[df['Course'] == course]
                if not filtered_df.empty:  # Proceed only if the DataFrame is not empty
                    num_sections = int(filtered_df[
                        (filtered_df['Instructor'] == instructor_name) &
                        (filtered_df['Course'] == course)
                    ]['# Sections'].iloc[0])

                    for section_id in range(1, num_sections + 1):
                        parts = [1, 2] if course != "CS435" else [1, 2, 3]
                        for part in parts:
                            # Construct the variable name
                            var_name = f"X_{course}_{instructor_name}_{section_id}_{part}_{day_full}_{time_slot_full}"

                            # Ensure the variable exists in the model
                            if var_name in variables:
                                # Update total points and log soft violations if necessary
                                total_points += points * variables[var_name]
                                if points < -1:  # Track soft violations with significant penalties
                                    instructor_soft_violated.append((var_name, points))

                                # Log the points adjustment (optional for debugging)
                                action = "Adding" if points > 0 else "Subtracting"
                                # Uncomment for detailed logging
                                # print(f"{action} {abs(points)} points for scheduling {instructor_name} (email: {email}) "
                                #       f"on {day_full}, time slot {time_slot_full} for course {course}, section {section_id}, "
                                #       f"part {part}")
        except IndexError:
            # Handle invalid slot codes and log the error
            print(f"Error in row {idx}, email: {email}, constraint: {constraint_type}, slot code: '{slot_code}'")
            continue  # Skip the problematic slot and proceed



In [19]:
from gurobipy import LinExpr

# Initialize a penalty expression for format preference violations
format_penalty_sum = LinExpr()

# Penalty value for violating format preferences
penalty_value = -8

# Read the "General Preferences" sheet from the Excel file
general_preferences_df = pd.read_excel(excel_file, sheet_name="General Preferences")

# Rename relevant columns for easier access
general_preferences_df = general_preferences_df.rename(columns={
    general_preferences_df.columns[1]: 'Email',
    general_preferences_df.columns[2]: 'Preference'
})

# Process each instructor's general preferences
for _, row in general_preferences_df.iterrows():
    email = row['Email']
    preference = row['Preference']

    # Find the instructor's name from the main DataFrame using their email
    instructor_row = df[df['Email'] == email]
    if instructor_row.empty:
        continue

    instructor_name = instructor_row['Instructor'].iloc[0]

    # Filter the DataFrame for courses assigned to the instructor
    instructor_courses = df[df['Instructor'] == instructor_name]

    # Process each course taught by the instructor
    for _, course_row in instructor_courses.iterrows():
        course = course_row['Course']
        num_sections = int(course_row['# Sections'])

        # Iterate through sections and parts
        for section_id in range(1, num_sections + 1):
            parts = [1, 2] if course != "CS435" else [1, 2, 3]

            # Iterate over pairs of time slots
            for day in days:
                for i in range(len(time_slots) - 1):  # Ensure the next slot exists
                    slot1 = time_slots[i]
                    slot2 = time_slots[i + 1]

                    # Construct variable names for part 1 and part 2
                    part1_var_name = f"X_{course}_{instructor_name}_{section_id}_1_{day}_{slot1}"
                    part2_var_name = f"X_{course}_{instructor_name}_{section_id}_2_{day}_{slot2}"

                    # Check if both variables exist in the model
                    if part1_var_name in variables and part2_var_name in variables:
                        part1_var = variables[part1_var_name]
                        part2_var = variables[part2_var_name]

                        # Add a penalty variable for violations
                        penalty_var = model.addVar(vtype=GRB.BINARY, name=f"Penalty_{instructor_name}_{day}_{slot1}_{slot2}")

                        if preference == "3-hour format":
                            # Add constraints to penalize non-consecutive parts (violation of 3-hour format preference)
                            model.addConstr(
                                part1_var - part2_var <= penalty_var,
                                name=f"violation_consecutive_{course}_{instructor_name}_{section_id}_{day}_{slot1}_{slot2}"
                            )
                            model.addConstr(
                                part2_var - part1_var <= penalty_var,
                                name=f"violation_consecutive_{course}_{instructor_name}_{section_id}_{day}_{slot1}_{slot2}"
                            )

                        elif preference == "1.5+1.5 hour format":
                            # Add constraints to penalize consecutive parts (violation of non-consecutive preference)
                            model.addConstr(
                                part1_var + part2_var - penalty_var <= 1,
                                name=f"violation_non_consecutive_{course}_{instructor_name}_{section_id}_{day}_{slot1}_{slot2}"
                            )

                        # Add the penalty to the total penalty expression
                        format_penalty_sum += penalty_value * penalty_var


In [20]:
# Read the "General Preferences" sheet and rename relevant columns
general_preferences_df = pd.read_excel(excel_file, sheet_name="General Preferences")
general_preferences_df = general_preferences_df.rename(columns={
    general_preferences_df.columns[1]: 'Email',
    general_preferences_df.columns[3]: 'Day Preference'
})

# Create a dictionary mapping emails to their day preferences
day_preference_dict = general_preferences_df.set_index('Email')['Day Preference'].to_dict()

# Initialize dictionary to store Z variables (binary variables for teaching on specific days)
z_vars = {}

# Iterate over all instructors
for instructor in df['Instructor'].unique():
    for day in days:
        # Create a binary variable to indicate whether the instructor teaches on this day
        z_var = model.addVar(vtype=GRB.BINARY, name=f"Z_{instructor}_{day}")
        z_vars[(instructor, day)] = z_var

        # List to collect relevant X variables for this instructor and day
        relevant_x_vars = []

        # Iterate over all courses taught by this instructor
        for course in df['Course'].unique():
            # Filter rows for this instructor and course
            course_instructor_rows = df[(df['Course'] == course) & (df['Instructor'] == instructor)]

            if course_instructor_rows.empty:
                continue

            # Get the number of sections for this course
            num_sections = int(course_instructor_rows['# Sections'].iloc[0])

            # Iterate over all sections, parts, and time slots
            for section_id in range(1, num_sections + 1):
                for part in [1, 2]:
                    for slot in time_slots:
                        # Construct the X variable name
                        x_var_name = f"X_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"

                        # Check if the variable exists in the model
                        if x_var_name in variables:
                            x_var = variables[x_var_name]
                            relevant_x_vars.append(x_var)

                            # Add a constraint to link X variable to the Z variable
                            model.addConstr(
                                x_var <= z_var,
                                name=f"x_var_link_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"
                            )

        # If no relevant X variables exist, ensure Z variable is set to 0
        if not relevant_x_vars:
            model.addConstr(
                z_var == 0,
                name=f"no_classes_{instructor}_{day}"
            )


In [21]:
from gurobipy import LinExpr

# Initialize a penalty expression for day preferences
day_penalty_sum = LinExpr()

# Iterate over instructors and their day preferences
for instructor in df['Instructor'].unique():
    # Get the email of the instructor
    email = df[df['Instructor'] == instructor]['Email'].iloc[0]

    # Check the instructor's day preference
    prefers_condensed_days = day_preference_dict.get(email, "No") == "I prefer to condense my sections into fewer days"

    # Set the penalty value based on the preference
    penalty_value = -8 if prefers_condensed_days else -3

    # Iterate over all days
    for day in days:
        # Retrieve the binary variable for whether the instructor teaches on this day
        z_var = z_vars[(instructor, day)]  # Ensure `z_vars` was defined earlier

        # Add the penalty contribution to the total penalty sum
        day_penalty_sum += penalty_value * z_var


In [22]:
from gurobipy import LinExpr

# Read the "General Preferences" sheet to get instructor preferences
preferences_df = pd.read_excel(excel_file, sheet_name="General Preferences")

# Extract the email column and the consecutive slots preference column
email_column_name = preferences_df.columns[1]  # Second column for emails
consecutive_preference_column_name = preferences_df.columns[5]  # Sixth column for consecutive slot preferences

# Create a dictionary mapping emails to their preference for consecutive slots
consecutive_preference = preferences_df.set_index(email_column_name)[consecutive_preference_column_name].to_dict()
print("Consecutive Preferences:", consecutive_preference)

# Initialize a penalty expression to accumulate penalties for consecutive slots
consecutive_penalty_sum = LinExpr()

# Penalty value for assigning consecutive slots when the instructor dislikes them
penalty_value = -2048

# Process each instructor who dislikes consecutive slots
for email, prefers_consecutive in consecutive_preference.items():
    if prefers_consecutive == "No":  # "No" indicates a preference against consecutive slots
        # Find the instructor's name using their email
        instructor_row = df[df['Email'] == email]
        if instructor_row.empty:
            continue

        instructor_name = instructor_row['Instructor'].iloc[0]

        # Iterate over all days
        for day in days:
            # Iterate over consecutive time slot pairs
            for slot_idx in range(len(time_slots) - 1):
                slot1 = time_slots[slot_idx]
                slot2 = time_slots[slot_idx + 1]

                # Process courses taught by this instructor
                for course in df['Course'].unique():
                    relevant_rows = df[(df['Instructor'] == instructor_name) & (df['Course'] == course)]
                    if relevant_rows.empty:
                        continue

                    # Get the number of sections for this course
                    num_sections = int(relevant_rows['# Sections'].iloc[0])

                    # Iterate over sections to define variables for consecutive slots
                    for section_id in range(1, num_sections + 1):
                        x_var1_name = f"X_{course}_{instructor_name}_{section_id}_1_{day}_{slot1}"
                        x_var2_name = f"X_{course}_{instructor_name}_{section_id}_2_{day}_{slot2}"

                        # Check if the variables exist in the model
                        if x_var1_name in variables and x_var2_name in variables:
                            x_var1 = variables[x_var1_name]
                            x_var2 = variables[x_var2_name]

                            # Add a penalty variable for consecutive slots
                            penalty_var = model.addVar(vtype=GRB.BINARY, name=f"Penalty_{instructor_name}_{day}_{slot1}_{slot2}")

                            # Add a constraint to track violations of the consecutive slot preference
                            model.addConstr(
                                x_var1 + x_var2 - 2 * penalty_var <= 1,
                                name=f"consecutive_penalty_{instructor_name}_{day}_{slot1}_{slot2}"
                            )

                            # Add the penalty to the cumulative penalty sum
                            consecutive_penalty_sum += penalty_value * penalty_var

print("Done processing consecutive slot penalties.")


Consecutive Preferences: {'kapleau@njit.edu': "I don't have a preference", 'rt494@njit.edu': 'Yes', 'ejt25@njit.edu': 'Yes', 'tweiss@njit.edu': 'Yes', 'ko89@njit.edu': "I don't have a preference", 'kp759@njit.edu': "I don't have a preference", 'js9@njit.edu': 'Yes', 'wohn@njit.edu': "I don't have a preference", 'egan@njit.edu': 'Yes', 'mjk76@njit.edu': "I don't have a preference", 'marvin@njit.edu': "I don't have a preference", 'sz457@njit.edu': "I don't have a preference", 'jingli@njit.edu': 'Yes', 'cliu@njit.edu': nan, 'kehoed@njit.edu': 'Yes', 'yl935@njit.edu': 'Yes', 'daher@njit.edu': 'Yes', 'ss797@njit.edu': 'Yes', 'azi3@njit.edu': 'Yes', 'jw65@njit.edu': 'Yes', 'jq55@njit.edu': 'Yes', 'nn43@njit.edu': 'Yes', 'meh43@njit.edu': "I don't have a preference", 'dli@njit.edu': 'No', 'skumar@njit.edu': 'Yes', 'monogiou@njit.edu': 'No', 'itani@njit.edu': 'Yes', 'mx6@njit.edu': "I don't have a preference", 'usman@njit.edu': 'Yes', 'chasewu@njit.edu': 'Yes', 'geller@njit.edu': "I don't have

In [23]:
# Set the objective function to maximize the total score (points and penalties)
model.setObjective(total_points + consecutive_penalty_sum + format_penalty_sum + day_penalty_sum, GRB.MAXIMIZE)

# Update the model to include all changes
model.update()

print(f"Scheduling model setup time: {time.time() - start_time:.2f} seconds\n")

# Set a time limit for the optimization (10 minutes)
model.setParam("TimeLimit", 600)

# Solve the optimization problem
start_optimize = time.time()
model.optimize()
print(f"Optimization time: {time.time() - start_optimize:.2f} seconds\n")

# Check and print violated slack variables
print("\nViolated Slack Variables (costing -2048 points):")
violated_slacks = []

for v in model.getVars():
    if "Slack_" in v.varName and v.x > 0.5:  # Check for slack variables indicating violations
        print(f"{v.varName}: Value {v.x}")
        violated_slacks.append(v.varName)

if not violated_slacks:
    print("No constraints related to health/religion were violated.")
else:
    print(f"\nTotal number of violated slack variables: {len(violated_slacks)}")


Scheduling model setup time: 303.02 seconds

Set parameter TimeLimit to value 600
Gurobi Optimizer version 12.0.0 build v12.0.0rc1 (mac64[x86] - Darwin 21.6.0 21H1320)

CPU model: Intel(R) Core(TM) i7-6567U CPU @ 3.30GHz
Thread count: 2 physical cores, 4 logical processors, using up to 4 threads

Non-default parameters:
TimeLimit  600

Optimize a model with 509842 rows, 287772 columns and 1788466 nonzeros
Model fingerprint: 0x964dbe26
Variable types: 0 continuous, 287772 integer (287772 binary)
Coefficient statistics:
  Matrix range     [1e+00, 2e+00]
  Objective range  [2e+00, 2e+03]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 2e+01]
Presolve removed 415828 rows and 272265 columns
Presolve time: 0.89s
Presolved: 94014 rows, 15507 columns, 285028 nonzeros
Variable types: 0 continuous, 15507 integer (15507 binary)
Found heuristic solution: objective -103413.0000
Performing another presolve...
Presolve removed 60747 rows and 5582 columns
Presolve time: 0.96s
Found heuris

In [24]:
from datetime import datetime

# Get current date and time
now = datetime.now()

# Format the date and time as a string
date_time_str = now.strftime("%Y-%m-%d_%H-%M-%S")


def calculate_scheduled_percentages(df, variables, model):
    """
    Calculates the percentage of scheduled classes for each day and time slot after optimization.
    
    Parameters:
        df (pd.DataFrame): The DataFrame containing course, instructor, and section data.
        variables (dict): Dictionary of Gurobi binary variables representing assignments.
        model (gurobipy.Model): The Gurobi model containing the optimized variables.
    
    Returns:
        pd.DataFrame: A DataFrame containing the percentage of scheduled classes for each day and time slot.
    """
    # Initialize schedule counts for each (day, time slot)
    schedule_counts = {(day, slot): 0 for day in days for slot in time_slots}
    total_classes = 0  # Total number of scheduled classes

    # Iterate over courses and sections to identify scheduled variables
    for _, row in df.iterrows():
        course = row['Course']
        instructor = row['Instructor']
        num_sections = int(row['# Sections'])

        if num_sections == 0:
            continue

        # Determine parts for the course (special case for CS435)
        parts = [1, 2, 3] if course == "CS435" else [1, 2]

        for section_id in range(1, num_sections + 1):
            for part in parts:
                for day in days:
                    for slot in time_slots:
                        var_name = f"X_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"

                        # Check if the variable is active (scheduled)
                        if var_name in variables and variables[var_name].X > 0.5:
                            schedule_counts[(day, slot)] += 1
                            total_classes += 1

    # Calculate percentages for each (day, slot)
    percentages = []
    for (day, slot), count in schedule_counts.items():
        percentage = (count / total_classes) * 100 if total_classes > 0 else 0
        percentages.append({'Day': day, 'Time Slot': slot, 'Percentage': percentage})

    # Convert the results to a DataFrame
    percentage_df = pd.DataFrame(percentages)

    return percentage_df


In [25]:
percentage_df = calculate_scheduled_percentages(df, variables, model)
print(percentage_df)

# Optionally save the result to a CSV for future analysis
percentage_df.to_csv(f"scheduled_percentages_{date_time_str}.csv", index=False)


          Day       Time Slot  Percentage
0      Monday   8:30-10:00 AM    2.662722
1      Monday  10:00-11:30 AM    4.142012
2      Monday   11:30-1:00 PM    1.775148
3      Monday    1:00-2:30 PM    4.437870
4      Monday    2:30-4:00 PM    2.958580
5      Monday    4:00-5:30 PM    0.295858
6      Monday    6:00-7:30 PM    0.591716
7      Monday    7:30-9:00 PM    0.591716
8     Tuesday   8:30-10:00 AM    2.958580
9     Tuesday  10:00-11:30 AM    4.437870
10    Tuesday   11:30-1:00 PM    2.366864
11    Tuesday    1:00-2:30 PM    1.183432
12    Tuesday    2:30-4:00 PM    1.775148
13    Tuesday    4:00-5:30 PM    1.479290
14    Tuesday    6:00-7:30 PM    0.000000
15    Tuesday    7:30-9:00 PM    0.000000
16  Wednesday   8:30-10:00 AM    3.846154
17  Wednesday  10:00-11:30 AM    5.325444
18  Wednesday   11:30-1:00 PM    2.366864
19  Wednesday    1:00-2:30 PM    3.846154
20  Wednesday    2:30-4:00 PM    0.887574
21  Wednesday    4:00-5:30 PM    0.887574
22  Wednesday    6:00-7:30 PM    1

In [26]:
if model.Status == GRB.INFEASIBLE:
    # Handle infeasibility by computing IIS and writing constraints
    print("The model is infeasible; computing IIS")
    model.computeIIS()

    with open(f"model_with_constraints_{date_time_str}.txt", "w") as f:
        f.write("Infeasible Model with Numbered Constraints\n\n")
        for i, constr in enumerate(model.getConstrs(), 1):
            if constr.IISConstr:
                f.write(f"Constraint {i}: {constr.ConstrName}\n")
                f.write(f"{model.getRow(constr)} = {constr.RHS}\n\n")

    print(f"IIS written to model_with_constraints_{date_time_str}.txt")

else:
    # Feasible solution: extract schedule and slack values
    schedule = []
    slack_values = []
    course_section_tracker = {}

    for var in model.getVars():
        if var.varName.startswith('Slack_') and var.x > 0.5:
            slack_values.append((var.varName, var.x))
        elif var.x > 0.5:
            try:
                _, course, instructor, section_id, part, day, slot = var.varName.split('_')
                email = df.loc[df['Instructor'] == instructor, 'Email'].values[0]
                schedule.append((course, instructor, email, section_id, part, day, slot))
            except ValueError:
                pass

    # Sort schedule lexicographically
    schedule.sort()

    # Write schedule sorted by course
    with open(f"final_schedule_sorted_by_course_{date_time_str}.txt", "w") as f:
        f.write("Course Schedule (Lexicographically Sorted):\n\n")
        for entry in schedule:
            course, instructor, email, section_id, part, day, slot = entry
            if course not in course_section_tracker:
                course_section_tracker[course] = 1
            section_number = course_section_tracker[course]
            capacity = section_capacity_map.get((course, instructor, int(section_id)))

            f.write(f"Course: {course}, Instructor: {instructor}, Email: {email}, Section: {section_number}, "
                    f"Part: {part}, Day: {day}, Slot: {slot}, Capacity: {capacity}\n")
            if (part == "2" and course != "CS435") or (part == "3" and course == "CS435"):
                course_section_tracker[course] += 1
                f.write("\n")

    print(f"Final schedule written to final_schedule_sorted_by_course_{date_time_str}.txt")

    # Write schedule sorted by instructor
    schedule_sorted_by_instructor = sorted(schedule, key=lambda x: x[1])
    with open(f"final_schedule_sorted_by_instructor_{date_time_str}.txt", "w") as f:
        f.write("Course Schedule (Sorted by Instructor):\n\n")
        course_section_tracker.clear()
        current_instructor = None

        for entry in schedule_sorted_by_instructor:
            course, instructor, email, section_id, part, day, slot = entry
            if course not in course_section_tracker:
                course_section_tracker[course] = 1
            section_number = course_section_tracker[course]

            if instructor != current_instructor:
                if current_instructor is not None:
                    f.write("\n")
                f.write(f"Instructor: {instructor}, Email: {email}\n")
                current_instructor = instructor

            capacity = section_capacity_map.get((course, instructor, int(section_id)))
            f.write(f"\tCourse: {course}, Section: {section_number}, Part: {part}, Day: {day}, Slot: {slot}, "
                    f"Capacity: {capacity}\n")
            if (part == "2" and course != "CS435") or (part == "3" and course == "CS435"):
                course_section_tracker[course] += 1

    print(f"Final schedule sorted by instructor written to final_schedule_sorted_by_instructor_{date_time_str}.txt")

    # Calculate and save scheduled percentages
    percentage_df = calculate_scheduled_percentages(df, variables, model)
    print(percentage_df)
    percentage_df.to_csv(f"scheduled_percentages_{date_time_str}.csv", index=False)

    # Track and sort instructors by the number of days they are scheduled
    instructor_days = defaultdict(set)
    for _, instructor, _, _, _, day, _ in schedule:
        instructor_days[instructor].add(day)

    instructor_day_counts = [
        (instructor, len(days), sorted(days)) for instructor, days in instructor_days.items()
    ]
    instructor_day_counts_sorted = sorted(instructor_day_counts, key=lambda x: x[1], reverse=True)

    # Write instructors sorted by the number of days on campus
    with open(f"instructors_sorted_by_days_on_campus_{date_time_str}.txt", "w") as f:
        f.write("Instructors sorted by the number of days they come to campus:\n\n")
        for instructor, num_days, days_list in instructor_day_counts_sorted:
            f.write(f"Instructor: {instructor}, Number of Days: {num_days}, Days: {', '.join(days_list)}\n")

    print(f"Instructors sorted by days written to instructors_sorted_by_days_on_campus_{date_time_str}.txt")


Final schedule written to final_schedule_sorted_by_course_2025-01-27_13-25-59.txt
Final schedule sorted by instructor written to final_schedule_sorted_by_instructor_2025-01-27_13-25-59.txt
          Day       Time Slot  Percentage
0      Monday   8:30-10:00 AM    2.662722
1      Monday  10:00-11:30 AM    4.142012
2      Monday   11:30-1:00 PM    1.775148
3      Monday    1:00-2:30 PM    4.437870
4      Monday    2:30-4:00 PM    2.958580
5      Monday    4:00-5:30 PM    0.295858
6      Monday    6:00-7:30 PM    0.591716
7      Monday    7:30-9:00 PM    0.591716
8     Tuesday   8:30-10:00 AM    2.958580
9     Tuesday  10:00-11:30 AM    4.437870
10    Tuesday   11:30-1:00 PM    2.366864
11    Tuesday    1:00-2:30 PM    1.183432
12    Tuesday    2:30-4:00 PM    1.775148
13    Tuesday    4:00-5:30 PM    1.479290
14    Tuesday    6:00-7:30 PM    0.000000
15    Tuesday    7:30-9:00 PM    0.000000
16  Wednesday   8:30-10:00 AM    3.846154
17  Wednesday  10:00-11:30 AM    5.325444
18  Wednesday

In [27]:
# Iterate over the list of violated soft constraints and print details
for v, point in instructor_soft_violated:
    if variables[v].X > 0.5:  # Check if the variable is active (value > 0.5)
        print(f"Variable: {v}, Value: {variables[v].X}, Penalty: {point}")


Variable: X_CS631_Theodoratos, Dimitrios_1_1_Wednesday_8:30-10:00 AM, Value: 1.0, Penalty: -8
Variable: X_IT310_Statica, Robert_1_1_Tuesday_2:30-4:00 PM, Value: 1.0, Penalty: -8
Variable: X_IT310_Statica, Robert_1_2_Thursday_2:30-4:00 PM, Value: 1.0, Penalty: -8
Variable: X_IT330_Statica, Robert_1_1_Friday_2:30-4:00 PM, Value: 1.0, Penalty: -8
Variable: X_IT310_Statica, Robert_2_1_Tuesday_4:00-5:30 PM, Value: 1.0, Penalty: -8
Variable: X_IT310_Statica, Robert_2_2_Thursday_4:00-5:30 PM, Value: 1.0, Penalty: -8
Variable: X_IT330_Statica, Robert_1_2_Friday_4:00-5:30 PM, Value: 1.0, Penalty: -8


In [29]:
# Dictionary to track total impact for each instructor
instructor_impact = {}

# Iterate over the soft violated variables and their corresponding points
for var_name, points in instructor_soft_violated:
    # Extract the instructor name from the variable name
    try:
        _, course, instructor_name, section_id, part, day, slot = var_name.split('_')
    except ValueError:
        print(f"Warning: Variable {var_name} does not match the expected format. Skipping.")
        continue

    # Only consider variables with a value greater than 0.5 in the optimized solution
    if variables[var_name].X > 0.5:
        # Initialize impact for the instructor if not already present
        if instructor_name not in instructor_impact:
            instructor_impact[instructor_name] = 0

        # Add the impact (penalty) for this variable
        instructor_impact[instructor_name] += points

# Sort instructors by their total impact in ascending order
sorted_instructors = sorted(instructor_impact.items(), key=lambda x: x[1])

# Print the sorted instructors and their impacts
print("Instructor impacts on total points (sorted by impact):")
for instructor, impact in sorted_instructors:
    print(f"Instructor: {instructor}, Net Impact: {impact}")


Instructor impacts on total points (sorted by impact):
Instructor: Statica, Robert, Net Impact: -48
Instructor: Theodoratos, Dimitrios, Net Impact: -8


In [30]:
# Iterate through all variables in the model
for var in model.getVars():
    # Check if the variable name starts with "Penalty_" and its value indicates it is active (greater than 0.5)
    if var.varName.startswith("Penalty_") and var.X > 0.5:
        print(f"Penalty Variable: {var.varName}, Value: {var.X}")


Penalty Variable: Penalty_Li, Daming_Wednesday_4:00-5:30 PM_6:00-7:30 PM, Value: 1.0
Penalty Variable: Penalty_Li, Daming_Friday_2:30-4:00 PM_4:00-5:30 PM, Value: 1.0
Penalty Variable: Penalty_Monogioudis, Pantelis_Monday_10:00-11:30 AM_11:30-1:00 PM, Value: 1.0
Penalty Variable: Penalty_Monogioudis, Pantelis_Wednesday_8:30-10:00 AM_10:00-11:30 AM, Value: 1.0
Penalty Variable: Penalty_Monogioudis, Pantelis_Monday_8:30-10:00 AM_10:00-11:30 AM, Value: 1.0
Penalty Variable: Penalty_Monogioudis, Pantelis_Tuesday_8:30-10:00 AM_10:00-11:30 AM, Value: 1.0
Penalty Variable: Penalty_Patel, Dipesh_Monday_2:30-4:00 PM_4:00-5:30 PM, Value: 1.0
Penalty Variable: Penalty_Patel, Dipesh_Thursday_1:00-2:30 PM_2:30-4:00 PM, Value: 1.0
Penalty Variable: Penalty_Patel, Dipesh_Monday_1:00-2:30 PM_2:30-4:00 PM, Value: 1.0
Penalty Variable: Penalty_Patel, Dipesh_Wednesday_11:30-1:00 PM_1:00-2:30 PM, Value: 1.0
Penalty Variable: Penalty_Hamidli, Fuad_Monday_2:30-4:00 PM_4:00-5:30 PM, Value: 1.0
Penalty Variab