A simple gridworld environment for testing RL algorithms in discrete action and state spaces.

In [4]:
import numpy as np
import plotly.express as px

In [64]:
class GridEnv():
    def __init__(self) -> None:
        self.grid = np.ones((10,10)) * -1
        self.grid[0, 1:-1] = -10
        
    def reset(self):
        self.player_position = [0,0]
    
    def step(self, action): # up=1, down=2, left=3, right=4

        next_state = self.player_position
        if action == "up":
            if next_state[0] != 0:
                next_state[0] -= 1
        elif action == "down":
            if next_state[0] != 9:
                next_state[0] += 1 
        elif action == "left":
            if next_state[1] != 0:
                next_state[1] -= 1
        elif action == "right":
            if next_state[1] != 9:
                next_state[1] += 1

        reward = self.grid[tuple(next_state)]
        done = True if self.player_position == [0,9] else False

        return next_state, reward, done

In [68]:
def random_policy():
    return np.random.choice(["left", "right", "up", "down"])

In [72]:
env = GridEnv()

N = 10
all_rewards = []

for _ in range(N):
    env.reset()
    done = False

    rewards = []
    n = 0
    n_max = 100
    while not done and n < n_max:
        n += 1
        action = random_policy()

        next_state, reward, done = env.step(action)
        rewards.append(reward)

    all_rewards.append(rewards)


In [74]:
[sum(arr)/len(arr) for arr in all_rewards]

[-2.17, -1.72, -3.88, -2.98, -2.89, -2.08, -1.99, -2.26, -1.27, -1.99]