In [1]:
!pip install --user gurobipy
import sys
print(sys.executable)





/opt/anaconda3/bin/python


In [5]:
import pandas as pd
from gurobipy import Model, GRB
import math
import copy
import time

start_time=time.time()

# Load the data from the Excel file
excel_file = "Scheduling Project Pilot (8).xlsx"
df = pd.read_excel(excel_file, sheet_name='Assignments_')
df = df.rename(columns={df.columns[0]: 'Course'})
df = df.rename(columns={df.columns[1]: 'Instructor'})
# Further filter the DataFrame to include only courses after "CS113" and exclude "CS115"
df = df.dropna(subset=['Course'])
df['Course_Number'] = df['Course'].str.extract(r'(\d+)')
df.rename(columns={'Section Size': 'Capacity'}, inplace=True)

def create_section_capacity_map(df):
    # Sort by Course, Instructor, and Capacity to ensure correct section numbering
    df_sorted = df.sort_values(by=['Course', 'Instructor', 'Capacity'])

    section_capacity_map = {}
    
    for (course, instructor), group in df_sorted.groupby(['Course', 'Instructor']):
        section_number = 1
        for _, row in group.iterrows():
            num_sections = int(row['# Sections'])
            capacity = row['Capacity']
            course=course.strip()
            instructor=instructor.strip()
            # Assign section numbers based on the sorted order
            for i in range(num_sections):
                section_capacity_map[(course, instructor, section_number)] = capacity
                section_number += 1
    
    return section_capacity_map


def create_aggregated_dataframe(df):
    # Group by (Course, Instructor) and sum the number of sections
    aggregated_df = df.groupby(['Course', 'Instructor', 'Course_Number','Email']).agg(
        {'# Sections': 'sum'}
    ).reset_index()

    return aggregated_df

new_excel_file = "Faculty and Instructors.xlsx"
new_df = pd.read_excel(new_excel_file,sheet_name='2024-09-Fall')
#print(new_df)

# Assuming the new_df also has an 'Instructor' column with professor names,
# Perform a merge on the 'Instructor' column
df = pd.merge(df, new_df, on='Instructor', how='left')
#print(df)

section_capacity_map = create_section_capacity_map(df)

# Print the section capacity map
#print("Section Capacity Map:")
#for key, value in section_capacity_map.items():
#    print(f"{key}: Capacity {value}")

# Step 2: Create the aggregated DataFrame with total sections for each (Course, Instructor)
aggregated_df = create_aggregated_dataframe(df)

# Print the new DataFrame
print("\nAggregated DataFrame:")
print(aggregated_df)
df=copy.deepcopy(aggregated_df)


# Drop the helper column used for filtering
#df = df.drop(columns=['Course_Number'])

# Define the time slots and days (including Saturday, but minimizing its usage)
time_slots = [
    "8:30-10:00 AM",
    "10:00-11:30 AM",
    "11:30-1:00 PM",
    "1:00-2:30 PM",
    "2:30-4:00 PM",
    "4:00-5:30 PM",
    "6:00-7:30 PM",
    "7:30-9:00 PM"
]

days = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]

# Load the balance percentages from the table in the image
slot_percentages = {
    ("Monday", "8:30-10:00 AM"): 0.20,
    ("Tuesday", "8:30-10:00 AM"): 0.20,
    ("Wednesday", "8:30-10:00 AM"): 0.25,
    ("Thursday", "8:30-10:00 AM"): 0.20,
    ("Friday", "8:30-10:00 AM"): 0.20,
    
    
    ("Monday", "10:00-11:30 AM"): 0.20,
    ("Tuesday", "10:00-11:30 AM"): 0.20,
    ("Wednesday", "10:00-11:30 AM"): 0.25,
    ("Thursday", "10:00-11:30 AM"): 0.20,
    ("Friday", "10:00-11:30 AM"): 0.20,
    
    
    ("Monday", "11:30-1:00 PM"): 0.20,
    ("Tuesday", "11:30-1:00 PM"): 0.20,
    ("Wednesday", "11:30-1:00 PM"): 0.25,
    ("Thursday", "11:30-1:00 PM"): 0.20,
    ("Friday", "11:30-1:00 PM"): 0.00,
    
    
    ("Monday", "1:00-2:30 PM"): 0.20,
    ("Tuesday", "1:00-2:30 PM"): 0.20,
    ("Wednesday", "1:00-2:30 PM"): 0.25,
    ("Thursday", "1:00-2:30 PM"): 0.20,
    ("Friday", "1:00-2:30 PM"): 0.20,
    
    
    ("Monday", "2:30-4:00 PM"): 0.20,
    ("Tuesday", "2:30-4:00 PM"): 0.20,
    ("Wednesday", "2:30-4:00 PM"): 0.00,  
    ("Thursday", "2:30-4:00 PM"): 0.20,
    ("Friday", "2:30-4:00 PM"): 0.20,
    
    ("Monday", "4:00-5:30 PM"): 0.20,
    ("Tuesday", "4:00-5:30 PM"): 0.20,
    ("Wednesday", "4:00-5:30 PM"): 0.00,  
    ("Thursday", "4:00-5:30 PM"): 0.20,
    ("Friday", "4:00-5:30 PM"): 0.20,
    
    # Evening slots
    ("Monday", "6:00-7:30 PM"): 0.20,
    ("Tuesday", "6:00-7:30 PM"): 0.20,
    ("Wednesday", "6:00-7:30 PM"): 0.20,
    ("Thursday", "6:00-7:30 PM"): 0.20,
    ("Friday", "6:00-7:30 PM"): 0.20,
    
    ("Monday", "7:30-9:00 PM"): 0.20,
    ("Tuesday", "7:30-9:00 PM"): 0.20,
    ("Wednesday", "7:30-9:00 PM"): 0.20,
    ("Thursday", "7:30-9:00 PM"): 0.20,
    ("Friday", "7:30-9:00 PM"): 0.20
}
variables = {}
model = Model("Scheduling")


# Add slack variables and update the objective to minimize slack
# Add slack variables and update the objective to minimize slack
slack_vars = {}
total_slack = 0  # Initialize total slack

for (day, slot), percentage in slot_percentages.items():
    if percentage > 0:  # Only define slack variables for non-zero percentages
        slack_var_name = f"Slack_{day}_{slot}"
        slack_vars[slack_var_name] = model.addVar(vtype=GRB.CONTINUOUS, name=slack_var_name)
        total_slack += slack_vars[slack_var_name]

# Update the constraint for the maximum allowed parts



# Define the binary variables


# Define the binary variables
for _, row in df.iterrows():
    course = row['Course']
    instructor = row['Instructor']
    num_sections = int(row['# Sections'])

    if num_sections == 0:
        continue

    # Adjust for "CS435" to have 3 parts instead of 2
    if course == "CS435":
        parts = [1, 2, 3]
    else:
        parts = [1, 2]

    for section_id in range(1, num_sections + 1):
        for part in parts:  # Adjusted parts based on the course
            for day in days:
                for slot in time_slots:
                    var_name = f"X_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"
                    variables[var_name] = model.addVar(vtype=GRB.BINARY, name=var_name)
# Calculate total number of section parts (2N)
#print("program is working!")
total_section_parts = 2 * sum(int(row['# Sections']) for _, row in df.iterrows())

for (day, slot), percentage in slot_percentages.items():
    slot_vars = [variables[var_name] for var_name in variables if var_name.split('_')[5] == day and var_name.split('_')[6] == slot]
    
    max_section_parts_slot = math.ceil((percentage / 6) * total_section_parts)
    
    if percentage > 0:  # Only add slack variables if the percentage is non-zero
        model.addConstr(
            sum(slot_vars) <= max_section_parts_slot + slack_vars[f"Slack_{day}_{slot}"], 
            name=f"balance_slot_with_slack_{day}_{slot}"
        )
    else:
        model.addConstr(
            sum(slot_vars) <= max_section_parts_slot, 
            name=f"balance_slot_no_slack_{day}_{slot}"
        )
    
 #   print(f"Constraint added for {day} {slot}: max {max_section_parts_slot} section parts")


    
evening_percentage = 0.20  # Evening slots defined as 6:00-7:30 PM and 7:30-9:00 PM

evening_slots = ["6:00-7:30 PM", "7:30-9:00 PM"]
for slot in evening_slots:
    evening_vars = [variables[var_name] for var_name in variables if var_name.split('_')[6] == slot]
    max_section_parts_evening = evening_percentage / 6 * total_section_parts
    model.addConstr(sum(evening_vars) <= max_section_parts_evening, name=f"balance_evening_{slot}")


# Set the objective to maximize the number of graduate courses scheduled on consecutive slots



# Add the constraints with numbering and spacing
# Add the constraints with numbering and spacing
constraint_counter = 0

for _, row in df.iterrows():
    course = row['Course']
    instructor = row['Instructor']
    num_sections = int(row['# Sections'])

    if num_sections == 0:
        continue

    # Adjust for "CS435" to have 3 parts instead of 2
    if course == "CS435":
        parts = [1, 2, 3]
    else:
        parts = [1, 2]

    for section_id in range(1, num_sections + 1):
        for part in parts:
            section_vars = []
            for day in days:
                for slot in time_slots:
                    var_name = f"X_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"
                    if var_name in variables:
                        var = variables[var_name]
                        section_vars.append(var)

            if section_vars:
                constraint_name = f"unique_slot_{course}_{instructor}_{section_id}_{part}"
                model.addConstr(sum(section_vars) == 1, name=constraint_name)
                constraint_counter += 1

# Ensure an instructor is not scheduled for more than one section at the same time slot
for instructor in df['Instructor'].unique():
    for day in days:
        for slot in time_slots:
            instructor_vars = []
            for _, row in df[df['Instructor'] == instructor].iterrows():
                course = row['Course']
                num_sections = int(row['# Sections'])
                if course == "CS435":
                    parts = [1, 2, 3]
                else:
                    parts = [1, 2]
                for section_id in range(1, num_sections + 1):
                    for part in parts:
                        var_name = f"X_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"
                        if var_name in variables:
                            var = variables[var_name]
                            instructor_vars.append(var)

            if instructor_vars:
                constraint_name = f"one_section_per_slot_{instructor}_{day}_{slot}"
                model.addConstr(sum(instructor_vars) <= 1, name=constraint_name)

                
# Define valid start times for graduate courses
valid_start_times = ["8:30-10:00 AM", "11:30-1:00 PM", "2:30-4:00 PM", "6:00-7:30 PM"]

for course in df['Course'].unique():
    for instructor in df['Instructor'].unique():
        course_instructor_rows = df[(df['Course'] == course) & (df['Instructor'] == instructor)]
        if course_instructor_rows.empty:
            continue

        course_number = int(course_instructor_rows['Course_Number'].iloc[0])

        for section_id in range(1, course_instructor_rows.iloc[0]['# Sections'] + 1):
            for slot in time_slots:

                # Constraints for courses with number < 600 (undergraduate)
                if course_number < 600:
                    var_part1_monday = f"X_{course}_{instructor}_{section_id}_1_Monday_{slot}"
                    var_part1_tuesday = f"X_{course}_{instructor}_{section_id}_1_Tuesday_{slot}"
                    var_part1_wednesday = f"X_{course}_{instructor}_{section_id}_1_Wednesday_{slot}"
                    var_part1_thursday = f"X_{course}_{instructor}_{section_id}_1_Thursday_{slot}"
                    var_part1_friday = f"X_{course}_{instructor}_{section_id}_1_Friday_{slot}"

                    # Part 1 on Monday: Part 2 on Wednesday or Thursday, same time slot
                    if var_part1_monday in variables:
                        var_part2_wednesday = f"X_{course}_{instructor}_{section_id}_2_Wednesday_{slot}"
                        var_part2_thursday = f"X_{course}_{instructor}_{section_id}_2_Thursday_{slot}"
                        model.addConstr(
                            variables[var_part1_monday] <= variables[var_part2_wednesday] + variables[var_part2_thursday],
                            name=f"part1_monday_part2_wed_thurs_{course}_{instructor}_{section_id}_{slot}"
                        )

                    # Part 1 on Tuesday: Part 2 on Thursday or Friday, same time slot
                    if var_part1_tuesday in variables:
                        var_part2_thursday = f"X_{course}_{instructor}_{section_id}_2_Thursday_{slot}"
                        var_part2_friday = f"X_{course}_{instructor}_{section_id}_2_Friday_{slot}"
                        model.addConstr(
                            variables[var_part1_tuesday] <= variables[var_part2_thursday] + variables[var_part2_friday],
                            name=f"part1_tuesday_part2_thurs_fri_{course}_{instructor}_{section_id}_{slot}"
                        )

                    # Part 1 on Wednesday: Part 2 on Friday, same time slot
                    if var_part1_wednesday in variables:
                        var_part2_friday = f"X_{course}_{instructor}_{section_id}_2_Friday_{slot}"
                        model.addConstr(
                            variables[var_part1_wednesday] <= variables[var_part2_friday],
                            name=f"part1_wednesday_part2_friday_{course}_{instructor}_{section_id}_{slot}"
                        )

                    # Prevent scheduling Part 1 on Thursday or Friday unless both parts are on the same day
                    if var_part1_thursday in variables:
                        model.addConstr(variables[var_part1_thursday] == 0, name=f"no_part1_thursday_{course}_{instructor}_{section_id}_{slot}")
                    if var_part1_friday in variables:
                        model.addConstr(variables[var_part1_friday] == 0, name=f"no_part1_friday_{course}_{instructor}_{section_id}_{slot}")

                

                   # For courses with number >= 600, schedule parts on consecutive time slots,
# with the first time slot starting at one of the valid start times.
                if course_number >= 600:
                    consecutive_schedule_vars = []  # Track consecutive scheduling variables for the section
                    
                    for day in days:
                        for idx, slot in enumerate(time_slots[:-1]):  # Exclude last slot for consecutive check
                            var_part1 = f"X_{course}_{instructor}_{section_id}_1_{day}_{slot}"
                            var_part2 = f"X_{course}_{instructor}_{section_id}_2_{day}_{time_slots[idx + 1]}"
                
                            # Only apply this constraint if the first time slot is one of the valid start times
                            if slot in valid_start_times and var_part1 in variables and var_part2 in variables:
                                # Create a binary variable to indicate whether this pair of time slots is used
                                var_consecutive = model.addVar(vtype=GRB.BINARY, name=f"Consecutive_{course}_{instructor}_{section_id}_{day}_{slot}")
                                consecutive_schedule_vars.append(var_consecutive)
                
                                # Ensure that if this consecutive pair is selected, both parts are scheduled
                                model.addConstr(
                                    variables[var_part1] == var_consecutive,
                                    name=f"link_part1_{course}_{instructor}_{section_id}_{day}_{slot}"
                                )
                                model.addConstr(
                                    variables[var_part2] == var_consecutive,
                                    name=f"link_part2_{course}_{instructor}_{section_id}_{day}_{slot}"
                                )
                
                    # Ensure that exactly one valid consecutive pair is chosen for the section
                    model.addConstr(
                        sum(consecutive_schedule_vars) == 1,  # Only one consecutive pair should be selected
                        name=f"one_consecutive_pair_{course}_{instructor}_{section_id}"
                    )

# Define the specific time slot and day for the constraint
restricted_day = "Monday"
restricted_time_slot = "4:00-5:30 PM"

# Add the constraint: Only courses with course number > 199 and "Sum of C" < 35 can be scheduled on Monday from 4:00 to 5:30
for _, row in df.iterrows():
    course = row['Course']
    instructor = row['Instructor']
    course_number = int(row['Course_Number'])
    for sc in range(1,row['# Sections']+1):
        capacity =  section_capacity_map.get((course, instructor, sc))

    # Check if the course meets the condition for being scheduled in this restricted slot
        if course_number > 199 and capacity < 35:
        # Loop over sections and parts to ensure the variables for this course are allowed to be scheduled
            for section_id in range(1, int(row['# Sections']) + 1):
                if course == "CS435":
                    parts = [1, 2, 3]
                else:
                    parts = [1, 2]
                for part in parts:
                    var_name = f"X_{course}_{instructor}_{section_id}_{part}_{restricted_day}_{restricted_time_slot}"
                    if var_name in variables:
                    # No constraint needed, the course meets the condition
                        continue
        else:
        # If the course does not meet the conditions, add a constraint to prevent it from being scheduled in the restricted slot
            for section_id in range(1, int(row['# Sections']) + 1):
                if course == "CS435":
                    parts = [1, 2, 3]
                else:
                    parts = [1, 2]
                for part in parts:
                    var_name = f"X_{course}_{instructor}_{section_id}_{part}_{restricted_day}_{restricted_time_slot}"
                    if var_name in variables:
                        model.addConstr(variables[var_name] == 0, name=f"restricted_slot_{course}_{instructor}_{section_id}_{part}_{restricted_day}_{restricted_time_slot}")

# Set slack variables to zero manually
#for slack_var_name, slack_var in slack_vars.items():
#    model.addConstr(slack_var == 0, name=f"set_{slack_var_name}_to_zero")
# Define the course blocks
course_blocks = [
    ['CS114', 'IS210', 'CS450', 'CS337'],
    ['CS241', 'CS280', 'IS350'],
    ['CS288', 'CS332', 'CS301', 'CS356'],
    ['CS341', 'CS350', 'CS351', 'CS331', 'CS375'],
    ['CS435', 'CS490', 'CS485', 'CS370', 'CS375'],
    ['CS485', 'CS491', 'CS450', 'CS482'],
    ['CS610', 'CS630', 'CS631', 'CS656', 'DS675', 'CS675', 'CS670'],  # Block-grad-core
    ['DS677', 'DS669', 'DS650', 'CS670', 'CS610', 'CS665', 'CS667','CS732',	'DS680'],  # Block-grad-DS+Alg
    ['CS608', 'CS645', 'CS646', 'CS647', 'CS648', 'CS678', 'CS696']   # Block-grad-cyber
]

# Add constraint to ensure no different parts of courses in the same block are scheduled in the same day and time slot
for block in course_blocks:
    for course1 in block:
        for course2 in block:
            if course1 != course2:
                for instructor1 in df[df['Course'] == course1]['Instructor'].unique():
                    for instructor2 in df[df['Course'] == course2]['Instructor'].unique():
                        for day in days:
                            for slot in time_slots:
                                var_course1_part1 = f"X_{course1}_{instructor1}_1_{day}_{slot}"
                                var_course1_part2 = f"X_{course1}_{instructor1}_2_{day}_{slot}"
                                var_course2_part1 = f"X_{course2}_{instructor2}_1_{day}_{slot}"
                                var_course2_part2 = f"X_{course2}_{instructor2}_2_{day}_{slot}"

                                # Add constraints to prevent parts of different courses from being scheduled in the same day and slot
                                if var_course1_part1 in variables and var_course2_part1 in variables:
                                    model.addConstr(
                                        variables[var_course1_part1] + variables[var_course2_part1] <= 1,
                                        name=f"no_same_day_slot_{course1}_{instructor1}_{course2}_{instructor2}_{day}_{slot}_part1"
                                    )
                                if var_course1_part2 in variables and var_course2_part2 in variables:
                                    model.addConstr(
                                        variables[var_course1_part2] + variables[var_course2_part2] <= 1,
                                        name=f"no_same_day_slot_{course1}_{instructor1}_{course2}_{instructor2}_{day}_{slot}_part2"
                                    )
                                    
# This dictionary maps time slot abbreviations to their respective slot index
time_slot_mapping = {
    'M': 'Monday', 
    'T': 'Tuesday', 
    'W': 'Wednesday', 
    'R': 'Thursday', 
    'F': 'Friday',
    'S': 'Saturday'  # Define 'S' for completeness, but we'll ignore it
}

# Time slot indexes
time_slot_index = {
    '1': "8:30-10:00 AM",
    '2': "10:00-11:30 AM",
    '3': "11:30-1:00 PM",
    '4': "1:00-2:30 PM",
    '5': "2:30-4:00 PM",
    '6': "4:00-5:30 PM",
    '7': "6:00-7:30 PM",
    '8': "7:30-9:00 PM"
}

# Sample data format:
# instructor_constraints = {
#     "ahh2@njit.edu": ["M2", "R2", "M4", "T4", "M5", "T5", "T7", "T8"],
#     "alexg@njit.edu": ["M5", "R3"]
# }

# Add the constraints for instructors who have "Health" or "Religion" type
df_constraints = pd.read_excel(excel_file, sheet_name='Constraints & Preferences')
for _, row in df_constraints.iterrows():
    instructor_info = row['Instructor UCID: Type']
    slots = row['Slots']
    if(isinstance(instructor_info,float)):
        break
    # Parse the instructor UCID and type
    email, constraint_type = instructor_info.split(": ")
 #   print(email, constraint_type)
    
    # We only care about the instructors with "Health" or "Religion" type
    if constraint_type.strip() in ["Health", "Religion"]:
        # Parse the blocked time slots for this instructor
        blocked_slots = slots.split("|")[1:-1]  # Remove empty elements from split
        
        for slot_code in blocked_slots:
            # Extract the day and time slot
            day_abbrev = slot_code[0]  # M, T, W, R, F
            time_slot_num = slot_code[1]  # 1-8
            
            day_full = time_slot_mapping[day_abbrev]
            time_slot_full = time_slot_index[time_slot_num]
            
            # Find the instructor name from the df
            instructor_row = df[df['Email'] == email]
            
            if not instructor_row.empty:
                instructor_name = instructor_row['Instructor'].iloc[0]  # Get the name as a scalar
                
                # Add a constraint to block this time slot for all parts of the instructor's courses
                for course in df[df['Instructor'] == instructor_name]['Course']:
                    
                    filtered_df = df[df['Course'] == course]
        
                    if not filtered_df.empty:  # Only proceed if the filtered DataFrame is not empty
                        num_sections = int(filtered_df['# Sections'].iloc[0])
                     #   print("num_sec=", num_sections)
                        for section_id in range(1, num_sections + 1):
                            if course == "CS435":
                                parts = [1, 2, 3]
                            else:
                                parts = [1, 2]
                            for part in parts:  # 2 parts per section
                                var_name = f"X_{course}_{instructor_name}_{section_id}_{part}_{day_full}_{time_slot_full}"
                        #        print("vvvv", var_name)
                                # Ensure the variable exists in the model
                                if var_name in variables and email!="usman@njit.edu":
                                    model.addConstr(
                                        variables[var_name] == 0, 
                                        name=f"block_slot_{instructor_name}_{day_full}_{time_slot_full}"
                                    )

                        #    if email == 'usman@njit.edu':
                         #       usman_constraints.append(f"Day: {day_full}, Time Slot: {time_slot_full}")
#rint(" for usman:")
# if usman_constraints:
#     print("Constraints for usman@njit.edu:")
#     for constraint in usman_constraints:
#         print(constraint)

points_per_slot = {
    "Pref-1": 3,
    "Pref-2": 2,
    "Pref-3": 1
}

# Initialize total points variable
# Iterate through the constraints from the "Constraints & Preferences" sheet

total_points = 0  # Initialize the total points for the model

# Assuming you already have a list or DataFrame of instructor names in 'df'
instructors_applied= {}  # Initialize an empty dictionary




for idx, row in df_constraints.iterrows():
    instructor_info = row['Instructor UCID: Type']
    slots = row['Slots']
    if(isinstance(instructor_info,float)):
        break
    # Parse the instructor UCID and constraint type
    email, constraint_type = instructor_info.split(": ")
    
    # Parse the blocked time slots for this instructor
    blocked_slots = slots.split("|")[1:-1]  # Remove empty elements from split

    # Handle different types of constraints
    if constraint_type.strip() == "Pref-1":
        points = 3
    elif constraint_type.strip() == "Pref-2":
        points = 2
    elif constraint_type.strip() == "Pref-3":
        points = 1
    elif constraint_type.strip() in ["Health", "Religion"]:
        continue  # No points assigned for Health/Religion as these are hard constraints
    else:
        points = -3  # Default negative points for all other types
    for slot_code in blocked_slots:
        try:
            # Check if the length of slot_code is at least 2
            if len(slot_code) < 2:
                print(f"Error in row {idx}, email: {email}, constraint: {constraint_type}, slot code: '{slot_code}' (invalid length)")
                continue  # Skip this slot if it's too short

            # Extract the day and time slot
            day_abbrev = slot_code[0]  # M, T, W, R, F
            time_slot_num = slot_code[1]  # 1-8

            # Extract the full day and time slot
            day_full = time_slot_mapping[day_abbrev]
            time_slot_full = time_slot_index[time_slot_num]

            # Find the instructor's name using the email, skip if not found
            instructor_row = df[df['Email'] == email]
            if instructor_row.empty:
                #print(f"Warning: Instructor with email {email} not found in 'df'. Skipping.")
                continue

            instructor_name = instructor_row['Instructor'].iloc[0]
            if instructor_name in instructors_applied:
                break

            # Add or subtract points based on the constraint type for all sections and parts of the instructor's courses
            for course in df[df['Instructor'] == instructor_name]['Course']:
                if instructor_name in instructors_applied:
                    break
                filtered_df = df[df['Course'] == course]
                if not filtered_df.empty:  # Only proceed if the filtered DataFrame is not empty
                    num_sections = int(filtered_df['# Sections'].iloc[0])

                    for section_id in range(1, num_sections + 1):
  #                      if instructor_name in instructors_applied:
  #                          break
                        if course == "CS435":
                            parts = [1, 2, 3]
                        else:
                            parts = [1, 2]
                        for part in parts:  # 2 parts per section
                            var_name = f"X_{course}_{instructor_name}_{section_id}_{part}_{day_full}_{time_slot_full}"
                            
                            # Ensure the variable exists in the model
                            if var_name in variables:
                                total_points += points * variables[var_name]

                                
                                # Log the points added or subtracted
                                action = "Adding" if points > 0 else "Subtracting"
                              #  print(f"{action} {abs(points)} points for scheduling {instructor_name} (email: {email}) "
                               #       f"on {day_full}, time slot {time_slot_full} for course {course}, section {section_id}, "
                                #      f"part {part}, due to constraint type: {constraint_type}")
        except IndexError:
            # Print the row index and the problematic slot cprint(f"Error in row {idx}, email: {email}, constraint: {constraint_type}, slot code: '{slot_code}'")
            continue  # Skip this slot and move to the next one

# Set the objective to maximize the sum of points

model.addConstr(total_slack == 0, name=f"balance constraints are hard.")


from gurobipy import Model, GRB

def add_slack_variables_and_constraints(df, model, variables):
    """
    This function defines slack variables u_(instructor, day) and adds constraints to ensure
    that u_(instructor, day) is 1 if the instructor is assigned to work on that day.
    
    Parameters:
        df (pd.DataFrame): The DataFrame containing course, instructor, and section data.
        model (gurobipy.Model): The Gurobi model.
        variables (dict): Dictionary of Gurobi variables representing the assignment of
                          instructors to sections, days, and time slots.
                          
    Returns:
        dict: A dictionary of slack variables u_(instructor, day).
    """
    slack_vars = {}

    # Loop through the DataFrame to create slack variables for each (instructor, day)
    for _, row in df.iterrows():
        course = row['Course']
        instructor = row['Instructor']
        num_sections = int(row['# Sections'])
        if num_sections == 0:
            continue

        for day in days:
            # Define a slack variable u_(instructor, day) for each instructor and day
            slack_var_name = f"u_{instructor}_{day}"
            if slack_var_name not in slack_vars:
                slack_vars[slack_var_name] = model.addVar(vtype=GRB.BINARY, name=slack_var_name)

            # Add constraints to ensure that u_(instructor, day) is >= any assigned variables for that day
            for section_id in range(1, num_sections + 1):
                if course == "CS435":
                    parts = [1, 2, 3]
                else:
                    parts = [1, 2]
                for part in parts:  # Two parts per section
                    for slot in time_slots:
                        var_name = f"X_{row['Course']}_{instructor}_{section_id}_{part}_{day}_{slot}"
                        if var_name in variables:
                        # Ensure u_(instructor, day) is 1 if any X_(course, instructor, section, part, day, slot) is 1
                            model.addConstr(variables[var_name] <= slack_vars[slack_var_name], 
                                        name=f"slack_constraint_{instructor}_{day}_{section_id}_{part}_{slot}")

    return slack_vars
def compute_sum_of_slack_variables(slack_vars, model):
    """
    This function computes the sum of all slack variables u_(instructor, day).
    
    Parameters:
        slack_vars (dict): Dictionary of slack variables u_(instructor, day).
        model (gurobipy.Model): The Gurobi model.
    
    Returns:
        float: The sum of all slack variables.
    """
    # Compute the total sum of all slack variables (sum of u_(instructor, day))
    total_slack_sum = model.addVar(vtype=GRB.CONTINUOUS, name="total_slack_sum")
    model.addConstr(total_slack_sum == sum(slack_vars.values()), name="sum_slack_constraint")
    
    return total_slack_sum
slack_vars = add_slack_variables_and_constraints(df, model, variables)


part1_slot = "6:00-7:30 PM"
part2_slot = "7:30-9:00 PM"
    
    # Iterate over the DataFrame and add constraints for each section
for _, row in df.iterrows():
    course = row['Course']
    instructor = row['Instructor']
    num_sections = int(row['# Sections'])
    
    if num_sections == 0:
        continue

    for section_id in range(1, num_sections + 1):
        for day in days:
            # Get the variable names for part 1 and part 2 for the relevant time slots
            part1_var_name = f"X_{course}_{instructor}_{section_id}_1_{day}_{part1_slot}"
            part2_var_name = f"X_{course}_{instructor}_{section_id}_2_{day}_{part2_slot}"
            
            # Ensure that the assignment of part 1 at 6:00-7:30 PM equals the assignment of part 2 at 7:30-9:00 PM on the same day
            model.addConstr(variables[part1_var_name] == variables[part2_var_name],
                            name=f"timing_constraint_{course}_{instructor}_{section_id}_{day}")

# Compute the sum of the slack variables
total_slack_sum = compute_sum_of_slack_variables(slack_vars, model)
consecutive_sum=0
# Iterate over all unique instructors
for instructor in df['Instructor'].unique() :

    # Iterate over all days
    for day in days:
        
        # Iterate over all possible starting time slots (the first of three consecutive slots)
        for i in range(len(time_slots) - 2):
            slot1 = time_slots[i]
            slot2 = time_slots[i + 1]
            slot3 = time_slots[i + 2]
            
            # Initialize a variable to sum the decision variables for the three consecutive slots
            
            
            # Filter the DataFrame to get only the courses and sections taught by the current instructor
            instructor_df = df[df['Instructor'] == instructor]

            # Loop over the first course taught by the instructor
            for idx1, row1 in instructor_df.iterrows():
                course1 = row1['Course']
                num_sections1 = int(row1['# Sections'])
                
                if num_sections1 == 0:
                    continue

                # Loop over the first section and part for the first slot
                for section_id1 in range(1, num_sections1 + 1):
                    if course1 == "CS435":
                        parts = [1, 2, 3]
                    else:
                        parts = [1, 2]
                    for part1 in parts:
                        # Build the variable name for the first slot
                        var_name1 = f"X_{course1}_{instructor}_{section_id1}_{part1}_{day}_{slot1}"

                        # Loop over the second course taught by the instructor
                        for idx2, row2 in instructor_df.iterrows():
                            course2 = row2['Course']
                            num_sections2 = int(row2['# Sections'])
                            
                            if num_sections2 == 0:
                                continue

                            # Loop over the second section and part for the second slot
                            for section_id2 in range(1, num_sections2 + 1):
                                if course1 == "CS435":
                                    parts = [1, 2, 3]
                                else:
                                    parts = [1, 2]
                                for part2 in parts:
                                    # Build the variable name for the second slot
                                    var_name2 = f"X_{course2}_{instructor}_{section_id2}_{part2}_{day}_{slot2}"

                                    # Check if var_name1 and var_name2 are different
                                    if (course1 != course2 or section_id1 != section_id2 or part1 != part2):
                                        
                                        # Loop over the third course taught by the instructor
                                        for idx3, row3 in instructor_df.iterrows():
                                            course3 = row3['Course']
                                            num_sections3 = int(row3['# Sections'])
                                            
                                            if num_sections3 == 0:
                                                continue

                                            # Loop over the third section and part for the third slot
                                            for section_id3 in range(1, num_sections3 + 1):
                                                if course1 == "CS435":
                                                    parts = [1, 2, 3]
                                                else:
                                                    parts = [1, 2]
                                                for part3 in parts:
                                                    # Build the variable name for the third slot
                                                    var_name3 = f"X_{course3}_{instructor}_{section_id3}_{part3}_{day}_{slot3}"

                                                    # Check if var_name3 is different from both var_name1 and var_name2
                                                    if (course1 != course3 or section_id1 != section_id3 or part1 != part3) and \
                                                       (course2 != course3 or section_id2 != section_id3 or part2 != part3):
                                                        consecutive_sum=0

                                                        # Add the variables to the sum if they exist in the dictionary
                                                        if var_name1 in variables:
                                                            consecutive_sum += variables[var_name1]
                                                        if var_name2 in variables:
                                                            consecutive_sum += variables[var_name2]
                                                        if var_name3 in variables:
                                                            consecutive_sum += variables[var_name3]

                                                        # Add the constraint that the sum of these variables must be <= 2
                                                        model.addConstr(consecutive_sum <= 2, 
                                                        name=f"consecutive_slots_constraint_{instructor}_{day}_{slot1}_{slot2}_{slot3}")

 


model.setObjective(total_points-total_slack_sum, GRB.MAXIMIZE)

                       
# Update the model
model.update()

print("scheduling model time= ",time.time()-start_time," seconds\n")
start_optimize=time.time()

# Solve the model
model.optimize()
print("optimization time= ",time.time()-start_optimize," seconds\n")
#print("section_capacity_map after calculation",section_capacity_map)


def calculate_scheduled_percentages(df, variables, model):
    """
    This function calculates the percentage of scheduled classes on each day and time slot after optimization.
    
    Parameters:
        df (pd.DataFrame): The DataFrame containing course, instructor, and section data.
        variables (dict): Dictionary of Gurobi binary variables representing the assignment of
                          instructors to sections, days, and time slots.
        model (gurobipy.Model): The Gurobi model containing the optimized variables.
    
    Returns:
        pd.DataFrame: A DataFrame containing the percentage of scheduled classes for each day and time slot.
    """
    # Create a dictionary to count the number of scheduled classes for each (day, time slot)
    schedule_counts = { (day, slot): 0 for day in days for slot in time_slots }
    
    total_classes = 0  # Track total number of classes

    # Iterate over the DataFrame to check which variables are active (scheduled)
    for _, row in df.iterrows():
        course = row['Course']
        instructor = row['Instructor']
        num_sections = int(row['# Sections'])
        
        if num_sections == 0:
            continue

        for section_id in range(1, num_sections + 1):
            if course == "CS435":
                parts = [1, 2, 3]
            else:
                parts = [1, 2]
            for part in parts:  # Two parts per section
                for day in days:
                    for slot in time_slots:
                        var_name = f"X_{course}_{instructor}_{section_id}_{part}_{day}_{slot}"
                        
                        # If the variable is scheduled (value is 1)
                        if variables[var_name].X > 0.5:
                            # Increment the count for that (day, slot)
                            schedule_counts[(day, slot)] += 1
                            total_classes += 1

    # Create a DataFrame to store the percentages for each (day, slot)
    percentages = []
    for day, slot in schedule_counts:
        count = schedule_counts[(day, slot)]
        percentage = (count / total_classes) * 100 if total_classes > 0 else 0
        percentages.append({'Day': day, 'Time Slot': slot, 'Percentage': percentage})

    # Convert the list of percentages to a DataFrame
    percentage_df = pd.DataFrame(percentages)
    
    return percentage_df


if model.Status == GRB.INFEASIBLE:
    # If the model is infeasible, compute the IIS and write the ILP model to a text file
    print("The model is infeasible; computing IIS")
    model.computeIIS()

    with open("model_with_constraints.txt", "w") as f:
        f.write("Infeasible Model with Numbered Constraints\n\n")
        for i, constr in enumerate(model.getConstrs(), 1):
            if constr.IISConstr:
                f.write(f"Constraint {i}: {constr.ConstrName}\n")
                f.write(f"{model.getRow(constr)} = {constr.RHS}\n\n")

    print("IIS written to model_with_constraints.txt")

if model.Status == GRB.OPTIMAL:
    # If the model is feasible, extract and write the schedule to a text file
    schedule = []
    slack_values = []
    
    # Dictionary to track section numbers for each course
    course_section_tracker = {}

    for var in model.getVars():
        if var.varName.startswith('Slack_'):
            # Store slack variables and their values
            slack_values.append((var.varName, var.x))
        elif var.x > 0.5:  # If the variable is selected in the optimal solution
            name = var.varName
            try:
                _, course, instructor, section_id, part, day, slot = name.split('_')
                # Find the corresponding email for the instructor in the DataFrame
                email = df.loc[df['Instructor'] == instructor, 'Email'].values[0]  # Assuming 'Email' column contains the emails
                schedule.append((course, instructor, email, section_id, part, day, slot))
            except ValueError:
                # Handle any variables that may not follow the expected naming pattern
                pass

    # Sort the schedule lexicographically
    schedule.sort()

    # Write the lexicographically sorted schedule and slack values to a file
    with open("final_schedule_with_slack.txt", "w") as f:
        f.write("Course Schedule (Lexicographically Sorted):\n\n")
        for entry in schedule:
            course, instructor, email, section_id, part, day, slot = entry
            course = course.strip()
            instructor = instructor.strip()

            # Track the section number for each course
            if course not in course_section_tracker:
                course_section_tracker[course] = 1  # Initialize section number for this course
            
            # Assign the next available section number for this course
            assigned_section_number = course_section_tracker[course]
  # Increment section number for next time
            
            # Retrieve the capacity from the map
            capacity = section_capacity_map.get((course, instructor, int(section_id)))

            # Write the information to the file
            f.write(f"Course: {course}, Instructor: {instructor}, Email: {email}, Section: {assigned_section_number}, Part: {part}, Day: {day}, Slot: {slot}, Capacity: {capacity}\n")
            if((part=="2" and course!="CS435") or (part=="3" and course=="CS435")):
                course_section_tracker[course] += 1

    print("Final schedule with slack values written to final_schedule_with_slack.txt")

    # Sort the schedule based on the instructor's name
    schedule_sorted_by_instructor = sorted(schedule, key=lambda x: x[1])  # Sort by the second element (instructor)

    # Write the instructor-sorted schedule and slack values to a separate file, grouped by instructor
    with open("final_schedule_sorted_by_instructor.txt", "w") as f:
        f.write("Course Schedule (Sorted by Instructor):\n\n")
        course_section_tracker = {}
        current_instructor = None
        for entry in schedule_sorted_by_instructor:
            course, instructor, email, section_id, part, day, slot = entry
          #  print(type(part))
            # Track the section number for each course (again for this output)
            if course not in course_section_tracker:
                course_section_tracker[course] = 1  # Initialize section number for this course
                
            assigned_section_number = course_section_tracker[course]
            # Increment section number for next time

            # If we encounter a new instructor, print their email and name first
            if instructor != current_instructor:
                if current_instructor is not None:
                    f.write("\n")  # Separate different instructors' sections
                
                f.write(f"Instructor: {instructor}, Email: {email}\n")
                current_instructor = instructor

            capacity = section_capacity_map.get((course, instructor, int(section_id)))

            # Write the course details for the current instructor
            f.write(f"\tCourse: {course}, Section: {assigned_section_number}, Part: {part}, Day: {day}, Slot: {slot}, Capacity: {capacity}\n")
            if((part=="2" and course!="CS435") or (part=="3" and course=="CS435")):
                course_section_tracker[course] += 1

    print("Final schedule sorted by instructor written to final_schedule_sorted_by_instructor.txt")

# Call the function to calculate the scheduled percentages
percentage_df = calculate_scheduled_percentages(df, variables, model)

# Print the resulting DataFrame with percentages
print(percentage_df)

# Optionally, write the percentages to a CSV file
percentage_df.to_csv("scheduled_percentages.csv", index=False)



Aggregated DataFrame:
   Course            Instructor Course_Number             Email  # Sections
0   CS114     Kapleau, Jonathan           114  kapleau@njit.edu           2
1   CS114    Zaidenberg, Ayelet           114     acz6@njit.edu           1
2   CS115    Qerimaj, Jertishta           115     jq55@njit.edu           3
3   CS115       Soltis, Jolanta           115   soltis@njit.edu           2
4   CS116               Wu, Jun           116     jw65@njit.edu           2
..    ...                   ...           ...               ...         ...
68  DS669              Li, Jing           669   jingli@njit.edu           1
69  DS675  Bakhshaliyev, Khalid           675    kb647@njit.edu           1
70  DS675         Roshan, Usman           675    usman@njit.edu           2
71  DS677            Li, Daming           677      dli@njit.edu           1
72  DS680           Du, Mengnan           680    md748@njit.edu           1

[73 rows x 5 columns]
scheduling model time=  22.938082933425903