In [1]:
import json
import numpy as np
import random

# Load data
with open('data/W1-01.json') as f:
    data = json.load(f)

with open('solutions/sol-W1-01.json') as f:
    solution = json.load(f)

# Hyperparameters
overtime_willingness_p = 1.0  # Willingness of nurses to work overtime

# Extract key data from the JSONs
nurses = data["Nurses"]
shifts = data["Shifts"]
shift_off_requests = data["Shiftoffrequests"]
contracts = data["Contracts"]
solutions = solution["Solution"]

# Create nurse availability from contracts and time-off requests
nurse_availability = {nurse["ID"]: [] for nurse in nurses}


KeyError: 'Shifts'

In [None]:

# Assuming we are dealing with shifts for a given time period
for shift in shifts:
    shift_id = shift["ShiftID"]
    for nurse in nurses:
        # Check if the nurse is available to work this shift
        available = True
        
        # Check for shift off requests
        if shift_id in [s["ShiftID"] for s in shift_off_requests.get(nurse["ID"], [])]:
            available = False
        
        # Check for contract constraints (max shifts per period)
        if available:
            contract = next(c for c in contracts if c["NurseID"] == nurse["ID"])
            # Here we just check if nurse has already maxed out shifts
            scheduled_shifts = sum(1 for s in solutions if s["NurseID"] == nurse["ID"] and s["ShiftID"] == shift_id)
            if scheduled_shifts >= contract["MaxShifts"]:
                available = False
        
        if available:
            nurse_availability[nurse["ID"]].append(shift_id)

# State representation
def get_state(t):
    # Create the state for time step t (current state of the scheduling problem)
    
    state = {}
    # Current schedule: list of assignments for all nurses, all shifts
    state['current_schedule'] = solutions  # The solution is already the initial assignment.
    
    # Absentee status: which nurses are absent and for which shifts
    state['absent_status'] = {nurse["ID"]: nurse["Absences"] for nurse in nurses}
    
    # Nurse availability: which nurses are available to take on additional shifts
    state['nurse_availability'] = nurse_availability
    
    # Overtime willingness: modeled as a probability distribution (using Gaussian as a stand-in)
    state['overtime_willingness'] = {nurse["ID"]: random.gauss(overtime_willingness_p, 0.1) for nurse in nurses}

    # Staffing levels (understaffing calculation)
    state['staffing_level'] = sum(1 for s in solutions if s["ShiftID"] == t and s["NurseID"] != 'vacant')  # Assume 'vacant' means no nurse assigned
    
    # Perturbation from the original schedule
    state['perturbation'] = sum(1 for s in solutions if s["ShiftID"] == t and s["NurseID"] != s["original_nurse_assignment"])  # Example, assume we track original assignments

    return state

# Reward function (using a stand-in for now)
def compute_reward(state, action):
    reward = 0
    
    # Perturbation penalty (example: if a lot of changes were made, penalize)
    reward -= state['perturbation']
    
    # Overtime cost: apply a penalty for assigning overtime hours
    overtime_cost = sum(1 for nurse_id, willingness in state['overtime_willingness'].items() if willingness > 0.5)  # Example: high willingness means higher cost
    reward -= overtime_cost
    
    # Understaffing penalty: penalize if staffing level is too low
    if state['staffing_level'] < len(shifts) // 2:  # Example: staffing below 50% of total shifts
        reward -= 10
    
    return reward

# Action space
def take_action(state, action):
    # Example action: swap nurse assignments or request overtime
    if action['type'] == 'swap':
        # Swap assignments between nurses
        nurse1 = action['nurse1']
        nurse2 = action['nurse2']
        
        # Swap their shifts (you'll need logic to verify validity based on constraints)
        for shift in shifts:
            if shift["ShiftID"] in state['nurse_availability'][nurse1] and shift["ShiftID"] in state['nurse_availability'][nurse2]:
                # Swap the assignments in the solutions
                for s in solutions:
                    if s["NurseID"] == nurse1 and s["ShiftID"] == shift["ShiftID"]:
                        s["NurseID"] = nurse2
                    elif s["NurseID"] == nurse2 and s["ShiftID"] == shift["ShiftID"]:
                        s["NurseID"] = nurse1
        return state, compute_reward(state, action)
    
    elif action['type'] == 'overtime':
        # Assign overtime to an available nurse (probabilistic, based on willingness)
        overtime_nurse = action['nurse']
        overtime_shift = action['shift']
        
        # Check overtime willingness
        willingness = state['overtime_willingness'][overtime_nurse]
        if random.random() < willingness:
            # Assign the overtime shift
            for s in solutions:
                if s["NurseID"] == 'vacant' and s["ShiftID"] == overtime_shift:
                    s["NurseID"] = overtime_nurse
            return state, compute_reward(state, action)
        else:
            return state, -10  # Penalty for failed overtime assignment
    
    else:
        # No change, just return the same state
        return state, -1  # Understaffing penalty

# Example of how the state would evolve based on the above setup
t = 1  # Example time step
state = get_state(t)

# Take an action (swap nurse assignments or assign overtime)
action = {'type': 'swap', 'nurse1': 'nurse_1', 'nurse2': 'nurse_2'}
state, reward = take_action(state, action)
print(state, reward)


In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Create environment and wrap it in DummyVecEnv for training
env = SchedulingEnv(data_path='data/W1-01.json', solution_path='solution/sol-W1-01.json', overtime_p=1.0)
env = DummyVecEnv([lambda: env])

# Initialize PPO model
model = PPO("MlpPolicy", env, verbose=1)

# Train the model
model.learn(total_timesteps=10000)

# Save the model after training
model.save("scheduling_ppo_model")


In [None]:
# Load the trained model
model = PPO.load("scheduling_ppo_model")

# Test the model
obs = env.reset()
for _ in range(100):
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    print(f"Action: {action}, Reward: {rewards}")
    if done:
        break


In [None]:

import gym
from gym import spaces
import numpy as np
import pandas as pd
import random
import json

class SchedulingEnv(gym.Env):
    def __init__(self, data_path, solution_path, overtime_p=1.0):
        super(SchedulingEnv, self).__init__()

        # Load the data and solution
        with open(data_path, 'r') as f:
            self.data = json.load(f)
        
        with open(solution_path, 'r') as f:
            self.solution = json.load(f)
        
        # Hyperparameter
        self.overtime_p = overtime_p  # Probability of a nurse accepting overtime
        
        # Initialize states
        self.current_schedule = self.solution['schedule']
        self.nurses = self.data['nurses']
        self.shifts = self.data['shifts']
        self.time_steps = len(self.shifts)  # Assuming each shift corresponds to a time step
        self.current_step = 0

        # Action space: 0 = no action (unchanged), 1 = swap, 2 = overtime
        self.action_space = spaces.Discrete(3)

        # Observation space: Here we keep the number of nurses, shifts, etc.
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(len(self.nurses), len(self.shifts)), dtype=np.int32
        )

    def reset(self):
        self.current_schedule = self.solution['schedule']  # reset to the initial schedule
        self.current_step = 0
        return self._get_obs()

    def _get_obs(self):
        # Create the current state representation
        state = np.zeros((len(self.nurses), len(self.shifts)), dtype=np.int32)

        # Fill the state matrix with current assignments (1 = assigned, 0 = not assigned)
        for nurse_idx, assignments in enumerate(self.current_schedule):
            for shift in assignments:
                state[nurse_idx, shift] = 1
        return state.flatten()

    def step(self, action):
        reward = 0
        done = False

        # Action Handling
        if action == 0:
            reward = -1  # No change (penalty for not addressing absence)
        elif action == 1:
            reward = self._perform_swap()
        elif action == 2:
            reward = self._offer_overtime()

        # Increment time step
        self.current_step += 1
        if self.current_step >= self.time_steps:
            done = True  # end of schedule window

        return self._get_obs(), reward, done, {}

    def _perform_swap(self):
        # Randomly select a nurse to swap assignments (simplified)
        nurse_1, nurse_2 = random.sample(range(len(self.nurses)), 2)
        shift_1 = random.choice(self.current_schedule[nurse_1])
        shift_2 = random.choice(self.current_schedule[nurse_2])

        # Swap the shifts
        self.current_schedule[nurse_1].remove(shift_1)
        self.current_schedule[nurse_1].append(shift_2)
        self.current_schedule[nurse_2].remove(shift_2)
        self.current_schedule[nurse_2].append(shift_1)

        return -1  # No reward for the swap itself

    def _offer_overtime(self):
        # Randomly offer overtime to a nurse
        overtime_acceptance = np.random.rand() < self.overtime_p

        if overtime_acceptance:
            return -2  # Overtime penalty (cost)
        else:
            return -3  # Understaffing penalty (cost)
