In [2]:
import numpy as np

class PyramidEnvironment:
    def __init__(self, size=5):
        self.size = size
        self.grid = np.zeros((size, 2*size-1), dtype=int)  # Pyramid grid
        self.agent_pos = (size-1, size-1)  # Agent starts at the center
        self.goal_pos = (0, 2*size-2)  # Goal position at the top right corner
        self.actions = [(-1, 0), (0, 1), (1, 0)]  # Up, Right, Down

        # Place the goal in the grid
        self.grid[self.goal_pos] = 1

    def reset(self):
        self.agent_pos = (self.size-1, self.size-1)  # Reset agent position to center
        return self.agent_pos

    def step(self, action):
        dx, dy = self.actions[action]
        x, y = self.agent_pos
        new_x, new_y = x + dx, y + dy

        # Check if new position is within bounds
        if 0 <= new_x < self.size and 0 <= new_y < 2*self.size-1:
            self.agent_pos = (new_x, new_y)

        # Calculate reward
        reward = -1 if self.agent_pos != self.goal_pos else 10

        # Check if episode is done
        done = self.agent_pos == self.goal_pos

        return self.agent_pos, reward, done

    def render(self):
        grid_copy = np.copy(self.grid)
        grid_copy[self.agent_pos] = 2  # Mark agent position
        print(grid_copy)

# Example usage
env = PyramidEnvironment()
env.render()

# Moving right
new_state, reward, done = env.step(1)
print("Agent moved right. New state:", new_state)
env.render()

# Moving up
new_state, reward, done = env.step(0)
print("Agent moved up. New state:", new_state)
env.render()

# Moving up
new_state, reward, done = env.step(0)
print("Agent moved up. New state:", new_state)
env.render()

[[0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 2 0 0 0 0]]
Agent moved right. New state: (4, 5)
[[0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0]]
Agent moved up. New state: (3, 5)
[[0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0]
 [0 0 0 0 0 0 0 0 0]]
Agent moved up. New state: (2, 5)
[[0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]]
