In [2]:
import numpy as np

In [3]:
k = np.zeros((5, 5))

indices = ([0, 0, 1], [0, 1, 0])
k[indices] = 1

# k[0, 1] = 1
# k[0, 0] = 1
# k[1, 0] = 1

k.nonzero()

(array([0, 0, 1], dtype=int64), array([0, 1, 0], dtype=int64))

In [4]:
np.indices(k.shape)

array([[[0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3],
        [4, 4, 4, 4, 4]],

       [[0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4]]])

In [8]:
UP = np.array([-1, 0])
DOWN = np.array([1, 0])
LEFT = np.array([0, -1])
RIGHT = np.array([0, 1])

MOVING_ACTIONS = np.array([UP, DOWN, LEFT, RIGHT])

GRID_EMPTY = 0
GRID_WALL = 1
GRID_PRIZE = 8

FOUR_ROOM = ([0, 2, 2, 2, 2, 4], [2, 0, 1, 3, 4, 2])
# ((0, 2), (2, 0), (2, 1), (2, 3), (2, 4), (4, 2))
MAZE = ([1, 1, 1, 2, 3, 4, 4, 4, 4], [1, 2, 3, 3, 1, 1, 2, 3, 4,])
# ((1, 1), (1, 2), (1, 3), (2, 3), (3, 1), (4, 1), (4, 2), (4, 3), (4, 4))
EMPTY = None

class Grid():
    def __init__(self, maze_type):
        self.grid = np.zeros((5,5))
        self.position = np.array([2,2])
        if maze_type != EMPTY:
            self.grid[maze_type] = GRID_WALL
        self.prize_location = self.__random_init_prize()

    def __random_init_prize(self):
        prize_location_choices = [(i, j) for i in range(5) for j in range(5) if self.grid[i, j] == GRID_EMPTY and (i, j) != (2, 2)]
        prize_idx = np.random.randint(len(prize_location_choices))
        self.grid[prize_location_choices[prize_idx]] = GRID_PRIZE
        return prize_location_choices[prize_idx]

    def move(self, direction):
        self.position += direction
        if (self.position >= 5).any() or (self.position < 0).any() or self.grid[self.position] == GRID_WALL:
            self.position -= direction
            return 0, self.position

        if self.grid[self.position] == GRID_PRIZE:
            return 1, self.position
        
        return 0, self.position
    
    def print_grid(self):
        for i in range(5):
            for j in range(5):
                if self.grid[i, j] == GRID_WALL:
                    print('#', end=' ')
                elif self.grid[i, j] == GRID_PRIZE:
                    print('P', end=' ')
                elif i == self.position[0] and j == self.position[1]:
                    print('*', end=' ')
                else:
                    print('O', end=' ')
            print()
    
    def restart(self):
        self.position = np.array([2,2])
        self.grid[self.prize_location] = GRID_EMPTY
        self.prize_location = self.__random_init_prize()

In [6]:
pos = np.array([3,2])
if (pos > 2).any():
    print("True")

True


In [11]:
g = Grid(FOUR_ROOM)
g.print_grid()
print()
g.restart()
g.print_grid()
print()
g.restart()
g.print_grid()

O O # O O 
O P O O O 
# # * # # 
O O O O O 
O O # O O 

O O # O O 
O P O O O 
# # * # # 
O O O O O 
O O # O O 

O O # O O 
O O O O O 
# # * # # 
O O P O O 
O O # O O 


In [14]:
Q = np.zeros((5, 5, 4, 4))
Q[2,2,3,2] = 1
Q[2,2,3,1] = 2
Q[2,2,3,0] = 3

np.argmax(Q[(2,2,3)])

0

In [81]:
NUM_SYMBOLS = 25
DISCOUNT = 0.9
TERMINATION_PROB = 0.1

class Sender():
    def __init__(grid, num_symbols, learning_rate):
        self.grid = grid
        self.Q = np.zeros((5, 5, num_symbols, len(MOVING_ACTIONS)))
        self.current_state = [2, 2, -1]
        self.learning_rate = learning_rate
        
    def receive_message(self, message):
        self.current_state[2] = message
        
        while True:
            action_idx = self.select_action()
            reward, new_position = grid.move(MOVING_ACTIONS[action_idx])
            self.new_state = [new_position[0], new_position[1], message]
            self.Q[self.current_state][action_idx] = (1 - self.learning_rate) * self.Q[self.current_state][action_idx] + self.learning_rate * (reward + DISCOUNT * np.max(self.Q[self.new_state]))
            self.current_state = self.new_state

            # termination condition
            if np.random.random() < TERMINATION_PROB or reward == 1:
                break
    def select_action(self, eps):
        # Eps: probability to explore
        if np.random.random() < eps:
            return np.random.randint(len(MOVING_ACTIONS))
        else:
            return np.argmax(self.Q[self.current_state])

class Receiver():
    def __init__(grid, num_symbols):
        self.grid = grid
        self.Q = np.zeros((num_symbols, len(MOVING_ACTIONS)))

        pass