In [5]:
import numpy as np
import pandas as pd

In [149]:
class GridBoard:
    def __init__(self, cols, rows):
        self.rows = rows
        self.cols = cols
        self.rewards = pd.DataFrame(np.zeros((rows, cols)))
        self.q_prev = pd.DataFrame([[[0,0,0,0]]*cols]*rows, index=range(rows), columns=range(cols))
        self.q_curr = self.q_prev.copy()
        self.walls = []
        self.show_board = pd.DataFrame('.', index=range(self.rows), columns=range(self.cols))
        for w in self.walls:
            self.show_board[w[0]][w[1]] = "X"
    
    def assignRewards(self, row, col, reward):
        self.rewards[row][col] = reward
    
    def reset_showboard(self):
        self.show_board = pd.DataFrame('.', index=range(self.rows), columns=range(self.cols))
        for w in self.walls:
            self.show_board[w[0]][w[1]] = "X"
    
    def set_walls(self, walls):
        for w in walls:
            self.walls.append(w)




In [163]:
class Player:
    def __init__(self, grid):
        self.curr_state = [0, 0]
        grid.show_board[self.curr_state[0]][self.curr_state[1]] = "*"
    
    def move(self, act):
        # determine if a move can occur
        # possible values for act: N, S, E, W
        if act == 'N':
            next_state = [self.curr_state[0], self.curr_state[1]-1]
            if self.curr_state[1] != 0 and next_state not in grid.walls: self.curr_state[1] -= 1 
        elif act == 'E':
            next_state = [self.curr_state[0]+1, self.curr_state[1]]
            if self.curr_state[0] < grid.cols and next_state not in grid.walls: self.curr_state[0] += 1 
        elif act == 'S':
            next_state = [self.curr_state[0], self.curr_state[1]+1]
            if self.curr_state[1] < grid.rows and next_state not in grid.walls: self.curr_state[1] += 1 
        elif act == 'W':
            next_state = [self.curr_state[0]-1, self.curr_state[1]]
            if self.curr_state[0] != 0 and next_state not in grid.walls: self.curr_state[0] -= 1 
        else:
            print("Invalid option")
        grid.reset_showboard()
        grid.show_board[self.curr_state[0]][self.curr_state[1]] = '*'
            


In [165]:
grid = GridBoard(5, 5)
grid.set_walls([[2, 0], [2, 1]])
print(grid.rewards)
print(grid.walls)
#print(grid.q_prev)
#print(grid.q_curr)

for w in grid.walls:
    grid.show_board[w[0]][w[1]] = "X"    

grid.assignRewards(4, 4, 1)
print(grid.rewards)

player = Player(grid)
print(player.curr_state)
print(grid.show_board)


path = ['E', 'E', 'E', 'E', 'S', 'E', 'S', 'E', 'N', 'E']

for p in path:
    player.move(p)
    print(player.curr_state)

    print(grid.show_board)

     0    1    2    3    4
0  0.0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0  0.0
3  0.0  0.0  0.0  0.0  0.0
4  0.0  0.0  0.0  0.0  0.0
[[2, 0], [2, 1]]
     0    1    2    3    4
0  0.0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0  0.0
3  0.0  0.0  0.0  0.0  0.0
4  0.0  0.0  0.0  0.0  1.0
[0, 0]
   0  1  2  3  4
0  *  .  X  .  .
1  .  .  X  .  .
2  .  .  .  .  .
3  .  .  .  .  .
4  .  .  .  .  .
[1, 0]
   0  1  2  3  4
0  .  *  X  .  .
1  .  .  X  .  .
2  .  .  .  .  .
3  .  .  .  .  .
4  .  .  .  .  .
[1, 0]
   0  1  2  3  4
0  .  *  X  .  .
1  .  .  X  .  .
2  .  .  .  .  .
3  .  .  .  .  .
4  .  .  .  .  .
[1, 0]
   0  1  2  3  4
0  .  *  X  .  .
1  .  .  X  .  .
2  .  .  .  .  .
3  .  .  .  .  .
4  .  .  .  .  .
[1, 0]
   0  1  2  3  4
0  .  *  X  .  .
1  .  .  X  .  .
2  .  .  .  .  .
3  .  .  .  .  .
4  .  .  .  .  .
[1, 1]
   0  1  2  3  4
0  .  .  X  .  .
1  .  *  X  .  .
2  .  .  .  .  .
3  .  .  .  .  .
4  .  .  .  .  .
[1, 1