Let actions be encoded as:

Left --> 0

Up --> 1

Right --> 2

Down --> 3

In [1]:
"""
Definition of the Warehouse Agent environment
"""

class WarehouseAgent():
    def __init__(self):
        """
        Initializing the environment
        """
        self.GRID_DIM = [6,7] # columns, rows
        self.agent_position = [1,2]
        self.box_location = [4,3]
        self.goal_location = [3,1]

        # a --> agent
        # w --> wall
        # e --> empty
        # b --> box
        # g --> goal
        self.game = [['w', 'w', 'w', 'w', 'e', 'e'],
                     ['w', ' ', 'A', 'w', 'e', 'e'],
                     ['w', ' ', ' ', 'w', 'w', 'w'],
                     ['w', 'G', ' ', ' ', ' ', 'w'],
                     ['w', ' ', ' ', 'B', ' ', 'w'],
                     ['w', ' ', ' ', 'w', 'w', 'w'],
                     ['w', 'w', 'w', 'w', 'e', 'e']
                    ]
     
    def reset(self):
        """Function to reset the environment at the end of each episode to its initial state configuration
        Returns:
            state: the state of the environment reset to its initial conditions
        """
        # self.box_location = [2,2]
        # print(self.box_location)
        self.__init__()
        return self
    

    def step(self, action):
        """Function to control and evaluate the agents' action
        Args:
            action: pass on the action which the agent needs to take at that time step
        Returns:
            new_state: the new state agent reaches after taking the action
            reward: the reward obtained on taking the action
            done: boolean value to determine if episode terminating condition is reached
        """

        agent_x, agent_y = self.agent_position
        box_x, box_y = self.box_location

        f = 1 # flag to indicate invalid move (no change in state)

        # LEFT
        if action == 0:

            if agent_y == 1: # 1st column
                f = 0

            else:
                if box_y == agent_y - 1:
                    if box_x == agent_x:
                        if box_y == 1:
                            f = 0
                        else:
                            
                            self.box_location[1] -= 1
                            self.agent_position[1] -= 1

                            # updates
                            self.game[agent_x][agent_y] = ' '
                            self.game[agent_x][agent_y-1] = 'A'
                            self.game[box_x][box_y-1] = 'B'
                            # print('moved left1')
                    else:

                        self.game[agent_x][agent_y] = ' '
                        self.game[agent_x][agent_y-1] = 'A'
                        self.agent_position[1] -= 1
                        # print('moved left2')
                else:
                
                    self.game[agent_x][agent_y] = ' '
                    self.game[agent_x][agent_y-1] = 'A'
                    self.agent_position[1] -= 1
                    # print('moved left3')


        # UP
        elif action == 1:
            if agent_x == 1:
                f = 0
            elif agent_x == 3 and agent_y in [3,4]:
                f = 0
            else:
                if box_x == agent_x - 1:
                    if box_y == agent_y:
                        if self.game[box_x-1][box_y] == 'w':
                            f = 0
                            # print('cant go up')
                        else:
                            self.box_location[0] -= 1
                            self.agent_position[0] -= 1
                            
                            # updates
                            self.game[agent_x][agent_y] =' '
                            self.game[agent_x - 1][agent_y] = 'A'
                            self.game[box_x-1][box_y] = 'B'
                            # print('moved up')
                    else:
                        self.agent_position[0] -= 1
                        
                        # updates
                        self.game[agent_x][agent_y] =' '
                        self.game[agent_x - 1][agent_y] = 'A'
                        # print('only agent up1')
                else:
                    self.agent_position[0] -= 1
                    
                    # updates
                    self.game[agent_x][agent_y] =' '
                    self.game[agent_x - 1][agent_y] = 'A'
                    # print('only agent up')

        
        # RIGHT
        elif action == 2:
            if agent_x in [3,4] and agent_y == 4:
                f = 0
            elif agent_x in [1,2,5] and agent_y == 2:
                f = 0
            else:
                if agent_y == box_y - 1:
                    if box_x == agent_x:
                        if self.game[box_x][box_y+1] == 'w':
                            f = 0
                        else:
                            self.box_location[1] += 1
                            self.agent_position[1] += 1

                            # updates
                            self.game[agent_x][agent_y] = ' '
                            self.game[agent_x][agent_y+1] = 'A'
                            self.game[box_x][box_y+1] = 'B'

                    else:
                        self.agent_position[1] += 1

                        # updates
                        self.game[agent_x][agent_y] = ' '
                        self.game[agent_x][agent_y+1] = 'A'
                else:
                    self.agent_position[1] += 1

                    # updates
                    self.game[agent_x][agent_y] = ' '
                    self.game[agent_x][agent_y+1] = 'A'
        # DOWN
        elif action == 3:
            if agent_x == 5:
                f = 0
            elif agent_x == 4 and agent_y in [3,4]:
                f = 0
            else:
                if box_x == (agent_x + 1):
                    if box_y == agent_y:
                        if self.game[box_x+1][box_y] == 'w':
                            print(1)
                            f = 0
                        else:
                            self.box_location[0] += 1
                            self.agent_position[0] += 1
                            
                            # updates
                            self.game[agent_x][agent_y] =' '
                            self.game[box_x+1][box_y] = 'B'
                            self.game[agent_x+1][agent_y] = 'A'
                            # print('moved-down1')
                    else:
                        # updates
                        self.game[agent_x][agent_y] =' '
                        self.game[agent_x+1][agent_y] = 'A'
                        # print('moved-down2')
                        
                        self.agent_position[0] += 1

                
                else:
                    # updates
                    self.game[agent_x][agent_y] =' '
                    self.game[agent_x+1][agent_y] = 'A'
                    # print('moved-down3')
                        
                    self.agent_position[0] += 1
        if f == 0:
            print("Same state") 
        done = False
        if self.box_location == [3,1]:
            reward = 0
            done = True
            print("Well done")

        else:
            reward = -1
        # print(self.box_location)
        if self.box_location in [[1,1], [1,2], [5,1], [5,2], [3,4], [4,4]]:
            done = True
            print('Mission Failed')

        return reward, done


        # failed attempt
        # if action == 0:
        #     if self.agent_position[1] != 1: # 2nd column can't go left.
        #         a, b = self.agent_position

        #         if self.box_location == [a,b-1]: # If box is on the cell left to the agent.
        #             if b != 2: # agent is 2 away from wall
        #                 self.agent_position[1] -= 1
        #                 self.box_location[1] -= 1
        #         else:
        #             self.agent_position[1] -= 1

        # elif action == 1:
        #     if self.agent_position[0] != 1: # 2nd row can't go up.
        #         a,b = self.agent_position

        #         if self.box_location == [a-1,b]: # If box is on the cell above the agent.
        #             if a != 2: # agent is 2 away from wall
        #                 self.agent_position[0] -= 1
        #                 self.box_location[0] -= 1
                
        #         elif self.agent_position not in ([3,3], [3,4]):
        #             self.agent_position[0] -= 1
        
        
        # elif action == 2:
        #     if ((self.agent_position[0] in (1,2,5)) and self.agent_position[1] == 2) or ((self.agent_position[0] in (3,4)) and self.agent_position[1] == 4): # removing all base cases
        #         self.agent_position = self.agent_position
        #     else:
        #         if self.agent_position[1] == 1 and self.box_location[1] == 2 and self.agent_position[0] in :

        # pass
    


    def render(self):
        """Function to get the simulation of the warehouse agent system 
        """
        
        for i in range(7):
            for j in range(6):
                print(self.game[i][j], end = ',')
            print('\n')


agent = WarehouseAgent()

A demo game

In [2]:
agent.reset()

<__main__.WarehouseAgent at 0x7f1cb1b9ac10>

In [3]:
agent.render()

w,w,w,w,e,e,

w, ,A,w,e,e,

w, , ,w,w,w,

w,G, , , ,w,

w, , ,B, ,w,

w, , ,w,w,w,

w,w,w,w,e,e,



A demo run of the agent(A) taking the box(B) to the goal(G).

In [4]:
# d d r r d
agent.step(3)
agent.step(3)
agent.step(2)
agent.step(2)
agent.step(3)
agent.render()
print("\n\n\n")

# move box left
agent.step(0)
agent.step(0)
agent.render()
print("\n\n\n")

# move below then left
agent.step(3)
agent.step(0)
agent.render()
print("\n\n\n")

# box is one step away from goal, so we move up
agent.step(1)
agent.render()


w,w,w,w,e,e,

w, , ,w,e,e,

w, , ,w,w,w,

w,G, , , ,w,

w, , ,B,A,w,

w, , ,w,w,w,

w,w,w,w,e,e,





w,w,w,w,e,e,

w, , ,w,e,e,

w, , ,w,w,w,

w,G, , , ,w,

w,B,A, , ,w,

w, , ,w,w,w,

w,w,w,w,e,e,





w,w,w,w,e,e,

w, , ,w,e,e,

w, , ,w,w,w,

w,G, , , ,w,

w,B, , , ,w,

w,A, ,w,w,w,

w,w,w,w,e,e,





Well done
w,w,w,w,e,e,

w, , ,w,e,e,

w, , ,w,w,w,

w,B, , , ,w,

w,A, , , ,w,

w, , ,w,w,w,

w,w,w,w,e,e,

