In [None]:
import numpy as np
import random
class GridWorld:
    def __init__(self, width, height, start_state, terminal_states, special_states, default_reward):
        self.width = width
        self.height = height
        self.start_state = start_state
        self.terminal_states = terminal_states
        self.special_states = special_states
        self.default_reward = default_reward
        self.current_state = None
        wind=[0, 0, 0, 1, 1, 1, 2, 2, 1, 0]

    def reset(self):
        self.current_state = self.start_state

    def is_terminal(self, state):
        return state in self.terminal_states

    def is_special(self, state):
        return state in self.special_states

    def get_reward(self, state, action):
        if self.is_terminal(state):
            return 0
        if self.is_special(state):
            return self.special_states[state]
        return self.default_reward

    def move(self, action):
        if self.current_state is None:
            raise ValueError("Cannot move in an unitialized state. Please call reset() first.")

        x, y = self.current_state
        if action == "north":
            y = max(0, y-1)
        elif action == "south":
            y = min(self.height-1, y+1)
        elif action == "east":
            x = max(0, x-1)
        elif action == "west":
            x = min(self.width-1, x+1)
        else:
            raise ValueError("Invalid action: {}".format(action))

        next_state = (x, y)
        reward = self.get_reward(next_state, action)
        done = self.is_terminal(next_state)

        self.current_state = next_state
        return next_state, reward, done

In [None]:
   # create an instance of the GridWorld class
#gw = GridWorld(width, height, start_state, terminal_states, special_states, default_reward)
gw= GridWorld(
    width=5,
    height=5,
    start_state=(0, 0),
    terminal_states=[],
    special_states={
        (0, 1): (4, 1, 10),
        (0, 3): (2, 3, 5)
    },
    default_reward=0

)
state = gw.reset()
for i in range(5):
  # call the move method with a specified action
  action = random.choice(["north", "south", "east", "west"])
  next_state, reward, done = gw.move(action)

  # print the results
  print("Action taken: {}".format(action))
  print("Next state: {}".format(next_state))
  print("Reward received: {}".format(reward))


Action taken: east
Next state: (0, 0)
Reward received: 0
Action taken: west
Next state: (1, 0)
Reward received: 0
Action taken: east
Next state: (0, 0)
Reward received: 0
Action taken: south
Next state: (0, 1)
Reward received: (4, 1, 10)
Action taken: west
Next state: (1, 1)
Reward received: 0


In [None]:
gw_3_5 = GridWorld(
    width=5,
    height=5,
    start_state=(0, 0),
    terminal_states=[],
    special_states={
        (0, 1): (4, 1, 10),
        (0, 3): (2, 3, 5)
    },
    default_reward=0

)
gw_4_1 = GridWorld(
    width=4,
    height=4,
    start_state=(0,0),
    terminal_states=[(4, 4)],
    special_states={},
    default_reward=0


)
gw_6_1 = GridWorld(
    width=7,
    height=10,
    start_state=(3, 0),
    terminal_states=[(3, 7)],
    special_states={
        0,
    },
    default_reward=-1
)

gw_6_6 = GridWorld(
    width=12,
    height=4,
    start_state=(4, 0),
    terminal_states=[(4, 12)],
    special_states={
    (4, 1): ((4, 0), -100),
    (4, 2): ((4, 0), -100),
    (4, 3): ((4, 0), -100),
    (4, 4): ((4, 0), -100),
    (4, 5): ((4, 0), -100),
    (4, 6): ((4, 0), -100),
    (4, 7): ((4, 0), -100),
    (4, 8): ((4, 0), -100),
    (4, 9): ((4, 0), -100),
    (4, 10): ((4, 0), -100),
    },
    default_reward= -1
)
print("\nGridWorld for Example 6.6:")
print("Width: {}".format(gw_6_6.width))
print("Height: {}".format(gw_6_6.height))
print("Start State: {}".format(gw_6_6.start_state))
print("Terminal States: {}".format(gw_6_6.terminal_states))
print("Special States: {}".format(gw_6_6.special_states))
print("Default Reward: {}".format(gw_6_6.default_reward))


GridWorld for Example 6.6:
Width: 12
Height: 4
Start State: (4, 0)
Terminal States: [(4, 12)]
Special States: {(4, 1): ((4, 0), -100), (4, 2): ((4, 0), -100), (4, 3): ((4, 0), -100), (4, 4): ((4, 0), -100), (4, 5): ((4, 0), -100), (4, 6): ((4, 0), -100), (4, 7): ((4, 0), -100), (4, 8): ((4, 0), -100), (4, 9): ((4, 0), -100), (4, 10): ((4, 0), -100)}
Default Reward: -1
