In [1]:
#Imports
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#World
no_of_time_blocks=8

#states - time blocks
S=[s for s in range(no_of_time_blocks)]
terminal = no_of_time_blocks-1

#actions
A=['same','other']
a_count=len(A)

#discounting
alpha=0.1
gamma = 1

In [32]:
import random
class Agent:
    epsilon = 0.1
    def __init__(self,S,A,alpha,gamma):
        self.Q = {s: {a: 0 for a in A} for s in S}
        self.alpha = alpha
        self.gamma = gamma

    def update_Q(self,s,sn,a,r):
        if self.Q[sn]['same']>=self.Q[sn]['other']:
            max_q = self.Q[sn]['same']
        else:
            max_q = self.Q[sn]['other']
        self.Q[s][a]=self.Q[s][a]+alpha*(r+gamma*max_q-self.Q[s][a])

    def action(self, s):
        c=random.random()
        if self.Q[s]['same']>=self.Q[s]['other']:
            if c>=Agent.epsilon:
                return 'same'
            else:
                return 'other'
        else:
            if c>=Agent.epsilon:
                return 'other'
            else:
                return 'same'



In [4]:
import copy

class ConstraintSystem:
    def __init__(self, no_of_agents, no_of_people):
        self.prev_action=[None for i in range(no_of_agents)]
        self.no_of_agents=no_of_agents
        self.no_of_people=no_of_people
        self.no_of_free=no_of_people-no_of_agents

    def checkActions(self, agent_actions):
        consys_actions=copy.deepcopy(agent_actions)
        penalise=[]
        count_extra = self.no_of_free - self.prev_action.count('same')
        for i in range(self.no_of_agents):
            if self.prev_action[i]!=agent_actions[i]:
                penalise.append(0)
            elif agent_actions[i]=='same': #will lead to overwork
                penalise.append(-100)
                consys_actions[i]='other'
            elif count_extra>0:
                count_extra -= 1
                penalise.append(0)
            else:
                penalise.append(-100) #will lead to no one being scheduled
                consys_actions[i]='same'
        self.prev_action=copy.deepcopy(consys_actions)
        return consys_actions, penalise
        



In [5]:
#define environment
from queue import Queue, Empty


class Environment:
    def __init__(self,no_of_positions,no_of_people):
        self.table={p: {s: None for s in S} for p in range(no_of_positions)}

        self.rest= Queue()
        #in final code, replace the below block with actual people names
        for person in range(no_of_people):
            self.rest.put(person)
        for i in self.table:
            self.table[i][0] = self.rest.get()

    def schedule(self,actions,s,reward):
        sn=s+1
        removed=[]
        for p in self.table:
            if actions[p] == 'same':
                reward[p] += 1
                self.table[p][sn]=self.table[p][s]
            else:
                if self.rest.empty():
                    reward[p]-=100
                    #print('oops')
                    return reward, True
                removed.append(self.table[p][s])
                self.table[p][sn]=self.rest.get()
                reward[p]+=1
        for ppl in removed:
            self.rest.put(ppl)
        return reward, False
   


In [None]:
#Initalize no of episodes
no_of_episodes=100

#initialize agents - aka positions
no_of_agents=4
agents = [Agent(S,A,alpha,gamma) for i in range(no_of_agents)]
Agent.epsilon=0.1
#initalize min no of ppl
x=no_of_agents
min_no_of_people=x//2+(x-x//2)*2

#check actual amount of ppl
no_of_people=x
if no_of_people < min_no_of_people:
    #in final code, append a bunch of "attention, out of constraint" or smtg blocks to the people names
    no_of_people= min_no_of_people
no_of_people=min_no_of_people
#no_of_people=6

#initialize constraints
constraints = ConstraintSystem(no_of_agents,no_of_people)

for e in range(no_of_episodes):
    env=Environment(no_of_agents,no_of_people)
    for s in S[:terminal]:
        agent_actions=[]
        for agent in agents:
            agent_actions.append(agent.action(s))
        consys_actions, reward = constraints.checkActions(agent_actions)
        reward,restart = env.schedule(consys_actions,s,reward)
        i=0
        for agent in agents:
            agent.update_Q(s,s+1,agent_actions[i],reward[i])
            i=i+1
        if restart:
            break

for i in range(no_of_agents):
    for j in env.table[i]:
        print(env.table[i][j], end='\t')
    print()

        
        

0	4	4	2	2	0	0	4	
1	5	5	3	3	1	1	5	
2	2	0	0	4	4	2	2	
3	3	1	1	5	5	3	3	


In [39]:
#testing with epsilon 0
Agent.epsilon=0
env=Environment(no_of_agents,no_of_people)
for s in S[:terminal]:
    agent_actions=[]
    for agent in agents:
        agent_actions.append(agent.action(s))
    consys_actions, reward = constraints.checkActions(agent_actions)
    reward,restart = env.schedule(consys_actions,s,reward)
    i=0
    for agent in agents:
        agent.update_Q(s,s+1,agent_actions[i],reward[i])
        i=i+1
    if restart:
        break

for i in range(no_of_agents):
    for j in env.table[i]:
        print(env.table[i][j], end='\t')
    print()

0	0	2	2	4	4	0	0	
1	1	3	3	5	5	1	1	
2	4	4	0	0	2	2	4	
3	5	5	1	1	3	3	5	
