In [1]:
import pandas as pd

# Read data
data_path = "nurse_schedule_project2_data_large_VA.xlsx"
# Read data from the excel file
locations = pd.read_excel(data_path, sheet_name="locations", index_col=0, header=None)
nurses = pd.read_excel(data_path, sheet_name="nurses", index_col=0)
task_time = pd.read_excel(data_path, sheet_name="task_execution_time", index_col=0).T
patients = pd.read_excel(data_path, sheet_name="patients", index_col=0)
nurses['current_location'] = pd.NA


# Break comma separated strings into lists for patients and nurses
for col in ['M', 'T', 'W', 'Th', 'F', 'S', 'Su']:
    patients[col] = patients[col].apply(lambda x: x.split(', ') if isinstance(x, str) else [])
for col in ['skillset']:
    nurses[col] = nurses[col].apply(lambda x: x.split(', ') if isinstance(x, str) else [])

# Transpose the dataframes for ease of access
patients_transposed = patients.T
nurses_transposed = nurses.T

# Sets and Parameters
N = nurses_transposed.columns.tolist()            # set of nurses
P = patients_transposed.columns.tolist()          # set of patients
L = list(locations.index)                         # set of locations
D = ['M', 'T', 'W', 'Th', 'F', 'S', 'Su']         # set of days
T_tasks = task_time.columns.tolist()              # set of tasks

# define a function to list all patients in a given location
def find_keys_with_inner_value(df, target_value):
    return list(df.T[df.T['location'] == target_value].index)

#test above function
find_keys_with_inner_value(patients_transposed, 'Norfolk')

['Patient_30',
 'Patient_32',
 'Patient_35',
 'Patient_49',
 'Patient_60',
 'Patient_64']

In [14]:
# Create a dictionary to keep track of each patient's task list per day
patients_tasks_per_day = {
    (patient_id, day): tasks
    for patient_id, patient_days_tasks in patients.iterrows()
    for day, tasks in patient_days_tasks.items() if day in D
}

# Create a nested dictionary for available nurses per patient task per day
nurses_available_for_patients_tasks = {
    (patient_id, day): {
        task: [nurse_id for nurse_id, nurse_skills in nurses['skillset'].items() if task in nurse_skills]
        for task in day_tasks
    }
    for (patient_id, day), day_tasks in patients_tasks_per_day.items()
}

# Create a dictionary for unmet patient tasks by day
unmet_patient_tasks_by_day = {
    day: [(patient_id, task) for (patient_id, patient_day), tasks in patients_tasks_per_day.items()
          if patient_day == day for task in tasks]
    for day in D
}

# Create a dictionary to keep track of the total schedule for each nurse
total_schedule = {
    nurse_id: {day: [] for day in D} for nurse_id in N
}


In [15]:
# Helper function definitions

# Define a function to "fully assign" a nurse on a day
def fully_assign_nurse(nurse_id, day, locations, nurses_available_for_patients_tasks, total_schedule, task_time):
    # Loop through the locations and try to assign tasks
    for location in locations:
        # Start with an empty list for the nurse's schedule on this day
        nurse_schedule = []
        scheduled_minutes = 0
        # Get all patients that have tasks in this location
        patients_in_location = find_keys_with_inner_value(patients, location)
        # Try to add tasks to the nurse's schedule
        for patient_id in patients_in_location:
            for task in patients_tasks_per_day.get((patient_id, day), []):
                # Check if the nurse is qualified and the task is not yet assigned
                if nurse_id in nurses_available_for_patients_tasks.get((patient_id, day), {}).get(task, []):
                    task_duration = task_time.loc[task].iloc[0]  # Assuming task_time is structured with tasks as rows and a single 'time' column
                    # Check if adding this task would exceed the nurse's work limit
                    if scheduled_minutes + task_duration <= 600:
                        # Add the task to the nurse's schedule
                        nurse_schedule.append((location, patient_id, task))
                        scheduled_minutes += task_duration
                        # Remove the task from the nurse's availability for other patients
                        remove_task_from_availability(nurse_id, patient_id, task, day)
                    else:
                        # If we can't add this task, we move to the next
                        continue
        # If there are tasks assigned, update the total schedule for the nurse
        if nurse_schedule:
            total_schedule[nurse_id][day].extend(nurse_schedule)
            # Assume the nurse is fully booked and return
            return

# Helper function to remove a task from a nurse's availability
def remove_task_from_availability(nurse_id, patient_id, task, day):
    # Update the available nurse list by removing the nurse for the task across all patients for the day
    for (p_id, d), tasks in nurses_available_for_patients_tasks.items():
        if d == day and task in tasks:
            if nurse_id in tasks[task]:
                tasks[task].remove(nurse_id)

# Helper function to find the maximum scheduled time for a nurse at a location on a day
def find_maximum_schedule(nurse_id, location, day, patients_tasks_per_day, nurses_available_for_patients_tasks, task_time):
    # Initialize the best schedule tracking variables
    bestSchedule = []
    bestTotalTime = 0

    # Get all patients at the current location who need care on the specified day
    patients_in_location = find_keys_with_inner_value(patients_transposed, location)
    
    # Loop through each patient at the location
    for patient_id in patients_in_location:
        # Initialize a temporary schedule for comparison
        tempSchedule = []
        totalTime = 0

        # Loop through each task for this patient
        for task in patients_tasks_per_day.get((patient_id, day), []):
            # Check if the nurse is available for this task
            if nurse_id in nurses_available_for_patients_tasks.get((patient_id, day), {}).get(task, []):
                # Calculate the time required for this task and update the total time
                task_duration = task_time[task]['Time']  # Task duration from task_time dataframe
                # Ensure the task doesn't push the nurse over their daily limit
                if (totalTime + task_duration) <= 600:  # 600 minutes daily work limit
                    # Add this task to the temporary schedule and update the total time
                    tempSchedule.append((patient_id, task))
                    totalTime += task_duration
                else:
                    # If this task can't be added without exceeding the limit, skip it
                    continue

        # After checking all tasks for this patient, compare to the best found so far
        if totalTime > bestTotalTime:
            # Update the best schedule and time found so far
            bestSchedule = tempSchedule
            bestTotalTime = totalTime

    # Return the best schedule found for this nurse at this location on this day
    return bestSchedule

#test the above function
#print(find_maximum_schedule('Nurse_1', 'Arlington', 'M', patients_tasks_per_day, nurses_available_for_patients_tasks, task_time))

def remove_nurse_from_other_locations(nurse_id, assigned_location, day, nurses_available_for_patients_tasks, patients_tasks_per_day):
    # Loop through all patient tasks for the given day
    for (patient_id, patient_day), tasks in patients_tasks_per_day.items():
        if patient_day == day:
            # Loop through all tasks for this patient
            for task in tasks:
                # If this task's location is not the assigned location, remove the nurse from the task's available nurses
                patient_location = patients.loc[patient_id, 'location']  # Assuming 'location' column exists in patients DataFrame
                if patient_location != assigned_location:
                    # Remove nurse from this task for all patients on this day
                    for task_available_nurses in nurses_available_for_patients_tasks.get((patient_id, day), {}).values():
                        if nurse_id in task_available_nurses:
                            task_available_nurses.remove(nurse_id)

#Test function above by printing the availability, then removing a nurse from a location and printing again
# print(nurses_available_for_patients_tasks[('Patient_1', 'F')])
# remove_nurse_from_other_locations('Nurse_5', 'Arlington', 'F', nurses_available_for_patients_tasks, patients_tasks_per_day)
# print(nurses_available_for_patients_tasks[('Patient_1', 'F')])

In [19]:
def schedule_nurses(nurses, patients, task_time, patients_tasks_per_day, nurses_available_for_patients_tasks):
    total_schedule = {nurse_id: {day: [] for day in D} for nurse_id in nurses.index}  # Initialize total_schedule
    unmet_patient_tasks_by_day = {day: [] for day in D}  # Initialize unmet_patient_tasks_by_day

    # Populate unmet_patient_tasks_by_day based on patients_tasks_per_day
    for (patient_id, day), tasks in patients_tasks_per_day.items():
        unmet_patient_tasks_by_day[day].extend([(patient_id, task) for task in tasks])

    # Iterate over each day
    for day in D:
        # Track if we're done with tasks for the day using a flag or by checking unmet tasks
        day_done = False
        while not day_done:
            max_schedule_for_day = {}  # Dictionary to keep the best schedule for each nurse on this day
            for nurse_id in N:
                best_schedule_for_nurse = []
                best_total_time_for_nurse = 0
                # Check availability and skills for tasks at each location
                for location in L:
                    nurse_schedule = find_maximum_schedule(nurse_id, location, day, patients_tasks_per_day, nurses_available_for_patients_tasks, task_time)
                    total_time_for_schedule = sum(task_time[task]['Time'] for patient_id, task in nurse_schedule)
                    if total_time_for_schedule > best_total_time_for_nurse and total_time_for_schedule <= 600:
                        best_schedule_for_nurse = nurse_schedule
                        best_total_time_for_nurse = total_time_for_schedule
                if best_total_time_for_nurse > 0:
                    max_schedule_for_day[nurse_id] = (best_total_time_for_nurse, best_schedule_for_nurse)

            # Sort nurses by the total time of schedules in descending order to prioritize full schedules
            sorted_nurses_by_schedule_time = sorted(max_schedule_for_day.items(), key=lambda item: item[1][0], reverse=True)
            
            print((sorted_nurses_by_schedule_time))
            # print(sorted_nurses_by_schedule_time)

            # Extract the first nurse from the sorted list, assign their schedule, and remove them from other locations
            if(len(sorted_nurses_by_schedule_time) == 0):
                day_done = True
                break
            best_result = sorted_nurses_by_schedule_time[0]
            nurse_id = best_result[0]
            total_time = best_result[1][0]
            nurse_schedule = best_result[1][1]
            patient_id = nurse_schedule[0][0]
            task = nurse_schedule[0][1]
            patient_location = patients.loc[patient_id, 'location']

            if day not in total_schedule[nurse_id]:
                total_schedule[nurse_id][day] = []
            total_schedule[nurse_id][day].append((patient_location, patient_id, task))
            if (patient_id, task) in unmet_patient_tasks_by_day[day]:
                unmet_patient_tasks_by_day[day].remove((patient_id, task))
            remove_nurse_from_other_locations(nurse_id, patient_location, day, nurses_available_for_patients_tasks, patients_tasks_per_day)
            next_day_index = D.index(day) + 1
            if next_day_index < len(D):  # Check if there's a next day
                next_day = D[next_day_index]
                remove_nurse_from_other_locations(nurse_id, None, next_day, nurses_available_for_patients_tasks, patients_tasks_per_day)
            nurses_available_for_patients_tasks[(patient_id, day)][task].remove(nurse_id)
            
            # print(unmet_patient_tasks_by_day[day])
            # Check if there are any unmet tasks left for the day to set the flag
            print(len(unmet_patient_tasks_by_day[day]))
            day_done = not any(unmet_patient_tasks_by_day[day]) or len(unmet_patient_tasks_by_day[day]) == 0
        print(f'Day {day} done')
    return total_schedule

# Run the scheduling function
total_schedule = schedule_nurses(nurses, patients, task_time, patients_tasks_per_day, nurses_available_for_patients_tasks)

# Print the total schedule
print(total_schedule)

[('Nurse_41', (139, [('Patient_98', 'drawing blood'), ('Patient_98', 'wound care')])), ('Nurse_42', (139, [('Patient_98', 'drawing blood'), ('Patient_98', 'wound care')])), ('Nurse_46', (139, [('Patient_23', 'drawing blood'), ('Patient_23', 'wound care')])), ('Nurse_47', (139, [('Patient_98', 'drawing blood'), ('Patient_98', 'wound care')])), ('Nurse_49', (139, [('Patient_98', 'drawing blood'), ('Patient_98', 'wound care')])), ('Nurse_8', (137, [('Patient_41', 'wound care'), ('Patient_41', 'physical therapy')])), ('Nurse_19', (137, [('Patient_25', 'wound care'), ('Patient_25', 'physical therapy')])), ('Nurse_20', (137, [('Patient_25', 'wound care'), ('Patient_25', 'physical therapy')])), ('Nurse_44', (137, [('Patient_25', 'wound care'), ('Patient_25', 'physical therapy')])), ('Nurse_15', (129, [('Patient_26', 'administering injections'), ('Patient_26', 'drawing blood')])), ('Nurse_17', (129, [('Patient_7', 'drawing blood'), ('Patient_7', 'administering injections')])), ('Nurse_21', (12

KeyboardInterrupt: 

**VARS**
list for each patient of available nurses, for each day, for each task
    for each patient
        for each nurse
            if len(nurses_tasks == patients_tasks)
                add nurse to list

list of patients, tasks unmet
    for each patient
        for each task
            add patient,task to list

**main function**
for each day
    while not done w tasks on that day
        find the maximum schedule (closest to 600 minutes) total for all locations 
        assign that nurse to that schedule
            aka add it to master_schedule
            impart the removal constraints as defined for removing the nurse from other availabilities
         

**rules for after scheduling a nurse**
when we schedule a nurse to a location, remove it from all lists at other locations
when a nurses scheduled time on a day + the time of a task > 600, remove it from all other patients for that day
    additionally, remove it from all patients at other locations on the following day


**helper functions to use in main function**
function to "fully assign" nurse on a day 
loops through all locations, 
    finds the location where its' schedule is maximized on a day, i.e. 
        pick a location
        assign self to available patient + task while scheduled time on day <=600

helper function to "find maximum scheduled time on a day at a location for a nurse"
    take in nurse, location, day
    return "schedule" a,k,a list of patient,task

helper void function to "remove nurse from other locations"
    take in list of patients, nurse, location

CONSTRAINTS:
1. A nurse can work up to 600 minutes / day
2. A nurse must take a day off to travel if changing locations
3. All patients tasks must be met
4. A nurse can only work at one location on a given day.
5. A nurse can only perform tasks that match their skillset.
6. task for a patient can only be assigned to one nurse on a given day.

* A nurse can be at a maximum of 2 different locations over all days.
* A nurse must work consecutive days




