from state 2 action down will lead to state 1
from state 1 action right will lead to state 7
from state 7 action right will lead to state 8
from state 8 action right will lead to state 6
from state 6 action up will lead to state 5
avoid state 3 which can be reached by right from state 2 and up from state 7
avoid state 4 which can be reached by left from state 5 and up from state 8
actions are up down left right
Model the environment and the transition probabilities 

2 -> 3 -> 4 -> 5
|    |    |    |
1-> 7 -> 8 -> 6


In [2]:
import numpy as np


state_to_location = dict((state, np.array(divmod(state, 3))) for state in range(9))


actions = ['up', 'down', 'left', 'right']

R = np.full((9, len(actions)), -100)
R[2, 1] = 0  
R[1, 3] = 0
R[7, 3] = 0  
R[8, 3] = 0  
R[6, 0] = 100  



In [12]:
class Environment:
    def __init__(self):

        self.state_to_location = dict((state, np.array(divmod(state, 3))) for state in range(9))

      
        self.actions = ['up', 'down', 'left', 'right']

        self.R = np.full((9, len(self.actions)), -100)
        self.R[2, 1] = 0  # state 2, action down
        self.R[1, 3] = 0  # state 1, action right
        self.R[7, 3] = 0  # state 7, action right
        self.R[8, 3] = 0  # state 8, action right
        self.R[7, 0] = -100 # state 7, action up
        self.R[8, 0] = -100 # state 8, action up
        self.R[6, 0] = 100  # state 6, action up (goal state)


        self.transitions = {
            'up': lambda s: s - 3 if s - 3 in self.state_to_location else s,
            'down': lambda s: s + 3 if s + 3 in self.state_to_location else s,
            'left': lambda s: s - 1 if s - 1 in self.state_to_location else s,
            'right': lambda s: s + 1 if s + 1 in self.state_to_location else s,
        }

        self.state = 2  

    def step(self, action):
        next_state = self.transitions[action](self.state)
        reward = self.R[self.state, self.actions.index(action)]
        self.state = next_state
        return next_state, reward

    def reset(self):
        self.state = 2  
        return self.state

    def print_transitions(self):
        for state in self.state_to_location:
            for action in self.actions:
                next_state = self.transitions[action](state)
                print(f"From state {state} action {action} will lead to state {next_state}")


env = Environment()


env.print_transitions()

From state 0 action up will lead to state 0
From state 0 action down will lead to state 3
From state 0 action left will lead to state 0
From state 0 action right will lead to state 1
From state 1 action up will lead to state 1
From state 1 action down will lead to state 4
From state 1 action left will lead to state 0
From state 1 action right will lead to state 2
From state 2 action up will lead to state 2
From state 2 action down will lead to state 5
From state 2 action left will lead to state 1
From state 2 action right will lead to state 3
From state 3 action up will lead to state 0
From state 3 action down will lead to state 6
From state 3 action left will lead to state 2
From state 3 action right will lead to state 4
From state 4 action up will lead to state 1
From state 4 action down will lead to state 7
From state 4 action left will lead to state 3
From state 4 action right will lead to state 5
From state 5 action up will lead to state 2
From state 5 action down will lead to sta

From state 0 action up will lead to state 0
From state 0 action down will lead to state 0
From state 0 action left will lead to state 0
From state 0 action right will lead to state 0
From state 1 action up will lead to state -2
From state 1 action down will lead to state 1
From state 1 action left will lead to state 1
From state 1 action right will lead to state 2
From state 2 action up will lead to state 2
From state 2 action down will lead to state 5
From state 2 action left will lead to state 2
From state 2 action right will lead to state 3
From state 3 action up will lead to state 3
From state 3 action down will lead to state 6
From state 3 action left will lead to state 2
From state 3 action right will lead to state 4
From state 4 action up will lead to state 4
From state 4 action down will lead to state 7
From state 4 action left will lead to state 3
From state 4 action right will lead to state 5
From state 5 action up will lead to state 5
From state 5 action down will lead to st

In [None]:
from state 0 action up will lead to state 0
from state 0 action down will lead to state 0
from state 0 action left will lead to state 0
from state 0 action right will lead to state 0
from state 1 action up will lead to state 2
from state 1 action down will lead to state 1
from state 1 action left will lead to state 1
from state 1 action right will lead to state 7
from state 2 action up will lead to state 2
from state 2 action down will lead to state 1
from state 2 action left will lead to state 2
from state 2 action right will lead to state 3
from state 3 action up will lead to state 3
from state 3 action down will lead to state 7
from state 3 action left will lead to state 2
from state 3 action right will lead to state 4
from state 4 action up will lead to state 4
from state 4 action down will lead to state 8
from state 4 action left will lead to state 3
from state 4 action right will lead to state 5
from state 5 action up will lead to state 5
from state 5 action down will lead to state 6
from state 5 action left will lead to state 4
from state 5 action right will lead to state 5
from state 6 action up will lead to state 5
from state 6 action down will lead to state 6
from state 6 action left will lead to state 8
from state 6 action right will lead to state 6
from state 7 action up will lead to state 3
from state 7 action down will lead to state 7
from state 7 action left will lead to state 1
from state 7 action right will lead to state 8
from state 8 action up will lead to state 4
from state 8 action down will lead to state 8
from state 8 action left will lead to state 7
from state 8 action right will lead to state 6

In [16]:
class Environment:
    def __init__(self):
        self.state_to_location = dict((state, np.array(divmod(state, 6))) for state in range(9))
        self.actions = ['up', 'down', 'left', 'right']

        # Define the rewards
        self.R = np.full((9, len(self.actions)), -100)
        self.R[2, 1] = 0  # state 2, action down
        self.R[1, 3] = 0  # state 1, action right
        self.R[7, 3] = 0  # state 7, action right
        self.R[8, 3] = 0  # state 8, action right
        self.R[6, 0] = 100  # state 6, action up (goal state)
        self.R[3, :] = -100  # state 3, all actions
        self.R[4, :] = -100  # state 4, all actions

        # Define the transitions
        self.transitions = {
            'up': lambda s: [0, 2, 3, 4, 5, 7, 8][s],
            'down': lambda s: [0, 1, 1, 7, 8, 6, 6, 7, 8][s],
            'left': lambda s: [0, 1, 2, 2, 3, 4, 8, 1, 7][s],
            'right': lambda s: [0, 7, 3, 4, 5, 5, 6, 8, 6][s],
        }

        self.state = 2  # Start state

    def step(self, action):
        next_state = self.transitions[action](self.state)
        reward = self.R[self.state, self.actions.index(action)]
        self.state = next_state
        return next_state, reward

    def reset(self):
        self.state = 2  # Reset to start state
        return self.state

    def print_transitions(self):
        for state in self.state_to_location:
            for action in self.actions:
                next_state = self.transitions[action](state)
                print(f"From state {state} action {action} will lead to state {next_state}")

env = Environment()
env.print_transitions()

From state 0 action up will lead to state 0
From state 0 action down will lead to state 0
From state 0 action left will lead to state 0
From state 0 action right will lead to state 0
From state 1 action up will lead to state 2
From state 1 action down will lead to state 1
From state 1 action left will lead to state 1
From state 1 action right will lead to state 7
From state 2 action up will lead to state 3
From state 2 action down will lead to state 1
From state 2 action left will lead to state 2
From state 2 action right will lead to state 3
From state 3 action up will lead to state 4
From state 3 action down will lead to state 7
From state 3 action left will lead to state 2
From state 3 action right will lead to state 4
From state 4 action up will lead to state 5
From state 4 action down will lead to state 8
From state 4 action left will lead to state 3
From state 4 action right will lead to state 5
From state 5 action up will lead to state 7
From state 5 action down will lead to sta

IndexError: list index out of range

In [25]:
class Environment:
    def __init__(self):
        self.state_to_location = dict((state, np.array(divmod(state, 6))) for state in range(9))
        self.actions = ['up', 'down', 'left', 'right']

        
        self.R = np.full((9, len(self.actions)), -100)
        self.R[2, 1] = 0  # state 2, action down
        self.R[1, 3] = 0  # state 1, action right
        self.R[7, 3] = 0  # state 7, action right
        self.R[8, 3] = 0  # state 8, action right
        self.R[6, 0] = 100  # state 6, action up (goal state)
        self.R[3, :] = -100  # state 3, all actions
        self.R[4, :] = -100  # state 4, all actions

    
        self.transitions = {
            'up': lambda s: [0, 2, 2, 3, 4, 5, 5, 3, 4][s],
            'down': lambda s: [0, 1, 1, 7, 8, 6, 6, 7, 8][s],
            'left': lambda s: [0, 1, 2, 2, 3, 4, 8, 1, 7][s],
            'right': lambda s: [0, 7, 3, 4, 5, 5, 6, 8, 6][s],
        }

        self.state = 2  

    def Nextstep(self, action):
        next_state = self.transitions[action](self.state)
        reward = self.R[self.state, self.actions.index(action)]
        self.state = next_state
        return next_state, reward
    
    def step(state,action):
        next_state = self.transitions[action](self.state)
        reward = self.R[self.state, self.actions.index(action)]
        self.state = next_state
        return next_state, reward
    
    

    def reset(self):
        self.state = 2  
        return self.state

    def print_transitions(self):
        for state in self.state_to_location:
            for action in self.actions:
                next_state = self.transitions[action](state)
                print(f"From state {state} action {action} will lead to state {next_state}")

env = Environment()
env.print_transitions()

env.Nextstep(0)

IndentationError: expected an indented block after function definition on line 31 (3421959061.py, line 34)