World Definition

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
plt.style.use('ggplot')
rcParams['figure.figsize'] = 10, 10

dimension = 10

# represents the size of the world
main_axis = ['', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
grid_axis = np.linspace(0, 13, 13)
grid_interval = grid_axis[1] - grid_axis[0]
door_width = 0.1
grid_world = np.zeros((dimension+1, dimension+1))

actions = {0:'pick up', 1:'use', 5:'drop', 8:'up', 4: 'left', 2: 'down', 6:'right', 3:"do nothing"}

class EscapeRoom:

  def __init__(self):
    """Initializes everything"""

    """Default Values"""
    DEFAULT_PARAMS = {'p1' : [4, 6], 'p2' : [2, 1], 'k1' : [2, 4], 'k2' : [3, 9],
                  'd1' : [0, 5], 'd2' : [5,11], 'i1' : [], 'i2' : []}
    
    # defining keys and doors
    self.d1 = DEFAULT_PARAMS['d1']
    self.d2 = DEFAULT_PARAMS['d2']
    self.k1 = DEFAULT_PARAMS['k1']
    self.k2 = DEFAULT_PARAMS['k2']

    # agents
    self.p1 = DEFAULT_PARAMS['p1']
    self.p2 = DEFAULT_PARAMS['p2']

    # inventory
    self.i1 = DEFAULT_PARAMS['i2']
    self.i2 = DEFAULT_PARAMS['i1']

  def reset(self):

    """Reset the board from start"""
    self.__init__()
    return None
  
  def get_reward(self):
    """returns reward for every action"""
    reward_array = [-1, -1]

    if self.p1 == self.d1:
      reward_array[0] = 10

    if self.p2 == self.d2:
      reward_array[1] = 10

    return reward_array

  def execute_1(self, action = 3):
    """Executes action for agent p1"""

    # movements
    if action == 8:
      self.p1[1] += 1
    elif action == 4:
      self.p1[0] -= 1
    elif action == 2:
      self.p1[1] -= 1
    elif action == 6:
      self.p1[0] += 1
    
    # picking up keys
    elif action == 0 and (self.p1 == self.k1 or self.p1 == self.k2):
      if self.p1 == self.k1:
        self.i1.append("k1")
        self.k1 = None
      else:
        self.i1.append("k2")
        self.k2 = None
    
    # using keys
    elif action == 1 and np.sum(np.abs(np.asarray(self.p1) - np.asarray(self.d1))) == 1.0 and "k1" in self.i1:
        self.p1 = self.d1
        self.k1 = None
    # dropping keys
    elif action == 5 and len(self.i1) > 0:
        k = self.i1.pop()
        if k == "k1":
          self.k1 = self.p1
        else:
          self.k2 = self.p1
    else:
      pass


    # dont crash to the wall
    if self.p1[0] in [0,11] or self.p1[1] in [0,11]:
        self.__init__()
        return -1
    
    return 1

  def execute_2(self, action = 3):
    """Executes action for agent p2"""

    if action == 8:
        self.p2[1] += 1
    elif action == 4:
      self.p2[0] -= 1
    elif action == 2:
      self.p2[1] -= 1
    elif action == 6:
      self.p2[0] += 1
    elif action == 3:
      pass
    else:
      pass

    # dont crash to the wall
    if self.p2[0] in [0,11] or self.p2[1] in [0,11]:
        self.__init__()
        return -1

  def show(self):
    
    # to change positions
    agent1 = plt.Circle((grid_interval*(self.p1[0]+0.5),grid_interval*(self.p1[1]+0.5)), 0.3, color='r', alpha = 0.5)
    agent2 = plt.Circle((grid_interval*(self.p2[0]+0.5),grid_interval*(self.p2[1]+0.5)), 0.3, color='blue', alpha = 0.5)
    
    door1 = plt.Rectangle((self.d1[0]*grid_interval, self.d1[1]*grid_interval), grid_interval, grid_interval, color = "red", alpha = 0.8)
    door2 = plt.Rectangle((self.d2[0]*grid_interval, self.d2[1]*grid_interval), grid_interval, grid_interval, color = "blue", alpha = 0.8)
    
    if self.k1:
      key1 = plt.Rectangle(((self.k1[0]+0.375)*grid_interval,(self.k1[1]+0.25)*grid_interval), 0.25, 0.5, angle = -10, color = "red", alpha = 0.5)
    if self.k2:
      key2 = plt.Rectangle(((self.k2[0]+0.375)*grid_interval,(self.k2[1]+0.25)*grid_interval), 0.25, 0.5, angle = -10, color = "blue", alpha = 0.5)

    # wall -> DO NOT CHANGE
    wall1 = plt.Rectangle((0,0), grid_interval, 13, color = "black")
    wall2 = plt.Rectangle((0,0), 13, grid_interval, color = "black")
    wall4 = plt.Rectangle((0,grid_axis[-2]), 13, grid_axis[1], color = "black")
    wall3 = plt.Rectangle((grid_axis[-2],0),grid_axis[1], 13, color = "black")

    # get plot axis
    fig = plt.figure(figsize = (10,10))
    ax = plt.gca()

    # wall -> DO NOT CHANGE
    ax.add_patch(wall1)
    ax.add_patch(wall2)
    ax.add_patch(wall3)
    ax.add_patch(wall4)

    # world definintions 
    ax.add_patch(door1)
    ax.add_patch(door2)
    if self.k1:
      ax.add_patch(key1)
    if self.k2:
      ax.add_patch(key2)

    # plot agents
    ax.add_patch(agent1)
    ax.add_patch(agent2)

    plt.xticks(ticks = grid_axis, labels = main_axis)
    plt.yticks(ticks = grid_axis, labels = main_axis)
    plt.show()

In [None]:
# given state and action, return next state and reward

current_state = np.asarray([1, 2])

In [None]:
for i in range(10):
  world.execute_1(action = 4)
world.render()

AttributeError: ignored

## CASES TO CONSIDER


1.   **Impact of considering others**: Go to blue door, open it, take something and should close it and then go to red. Same objective for other agent, who does it after some time. (considering future tasks)
2.    **Illustration of optimal behaviours**: The agent uses the phone and must leave the phone close to the blue door (where the other agent resides, before leaving)
3.    **Varying the caring coeffecient**: How far does the agent go, when he wants to take other agent's key and leave it close to him. (show that too much caring can infact decrease the reward)


