In [78]:
import tqdm
import random
import numpy as np

from time import sleep
from collections import defaultdict
from grid import Grid, RandomGridGenerator

from IPython.display import clear_output
from utils import two_int_to_hash, numpy_arr_to_str

In [79]:
# GLOBAL CONFIGS
GRID_NUMBER_OF_ROWS = 10
GRID_NUMBER_OF_COLS = 10

PREDATOR_VISION_DIST = 3
PREY_VISION_DIST = 2

PREDATOR_VALUE = 1
PREY_VALUE = -1

PREDATOR_APPROXIMATE_SHARE = 0.4
PREY_APPROXIMATE_SHARE = 0.6

alpha = 0.1
gamma = 0.6
epsilon = 0.1

action_space = [
    "up",
    "down",
    "left",
    "right",
]

action_idxs = [0,1,2,3]

In [80]:
class Predator:
    def __init__(self):
        self.q_table = defaultdict(lambda: np.zeros(4))

    def choose_next_cell(self, state):

        if state not in self.q_table:
             self.q_table[state] = np.zeros(4) # get from other model

        if random.uniform(0, 1) < epsilon:
            return random.choice(action_idxs)
        else:
            return np.argmax(self.q_table[state])

class Prey:
    def __init__(self):
        self.q_table = defaultdict(lambda: np.zeros(4))

    def choose_next_cell(self, state):

        if state not in self.q_table:
             self.q_table[state] = np.zeros(4) # get from other model

        if random.uniform(0, 1) < epsilon:
            return random.choice(action_idxs)
        else:
            return np.argmax(self.q_table[state])


In [81]:
class Simulation:

    @staticmethod
    def get_dest_from_action(action, row, col):
        action = action_space[action]
        if action == "down":
            return (row+1) % GRID_NUMBER_OF_ROWS, col

        elif action == "up":
            return (row-1) % GRID_NUMBER_OF_ROWS, col

        elif action == "left":
            return row, (col-1)%GRID_NUMBER_OF_COLS

        elif action == "right":
            return row, (col+1)%GRID_NUMBER_OF_COLS

        else:
            raise Exception(f"Action can't be {action}")


In [82]:
rgg = RandomGridGenerator(GRID_NUMBER_OF_ROWS, GRID_NUMBER_OF_COLS)

generated_grid = rgg.getGrid(round(PREDATOR_APPROXIMATE_SHARE * 100) * [PREDATOR_VALUE] + round(PREY_APPROXIMATE_SHARE * 100) * [PREY_VALUE])

In [83]:
predator = Predator()
prey = Prey()
simulation = Simulation()

In [87]:
grid = Grid(generated_grid)
prey_c = []
pred_c = []
for time in tqdm.tqdm(range(100000)):
    next_state = np.zeros((grid.rown, grid.coln))

    if time % 4 == 0:
        for row, line in enumerate(grid.grid):
            for col, item in enumerate(line):
                if item == PREY_VALUE:
                    vision_space = grid.getNeighbors(row, col, PREY_VISION_DIST)
                    prey_action = prey.choose_next_cell(numpy_arr_to_str(vision_space))

                    dest = simulation.get_dest_from_action(prey_action, row, col)
                    next_state[dest[0], dest[1]] = item

    else:
        for row, line in enumerate(grid.grid):
            for col, item in enumerate(line):
                if item == PREY_VALUE:
                    next_state[row, col] = item

    if time % 3 == 0:
        for row, line in enumerate(grid.grid):
            for col, item in enumerate(line):
                reward = 0
                if item == PREDATOR_VALUE:
                    vision_space = grid.getNeighbors(row, col, PREDATOR_VISION_DIST)
                    prey_action = predator.choose_next_cell(numpy_arr_to_str(vision_space))
                    dest = simulation.get_dest_from_action(prey_action, row, col)
                    if next_state[dest[0], dest[1]] == PREY_VALUE:
                        reward = 1

                    next_state[dest[0], dest[1]] = item
                    max_value = np.max(predator.q_table[numpy_arr_to_str(grid.getNeighborsFromNext(next_state, dest[0], dest[1], PREDATOR_VISION_DIST))])
                    new_q_value = (1 - alpha) * predator.q_table[numpy_arr_to_str(vision_space)][prey_action] + alpha * (reward + gamma * max_value)
                    predator.q_table[numpy_arr_to_str(vision_space)][prey_action] = new_q_value

    else:
        for row, line in enumerate(grid.grid):
            for col, item in enumerate(line):
                if item == PREDATOR_VALUE:
                    next_state[row, col] = item


    grid = Grid(next_state)

    if np.count_nonzero(grid.grid == -1) < 10:
        for row, line in enumerate(grid.grid):
            for col, item in enumerate(line):
                if item == 0 and random.uniform(0,1) < 0.1:
                    grid.grid[row][col] = PREY_VALUE


    prey_c.append(np.count_nonzero(grid.grid == -1) )
    pred_c.append(np.count_nonzero(grid.grid == 1) )

    if time % 100 == 0:
        clear_output(wait=True)
        print("Episode: {}".format(time + 1))
        print(grid.grid)


Episode: 99901
[[ 0.  0.  0.  0.  0.  0.  0.  0. -1.  0.]
 [ 0. -1.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0. -1.  0.  0.  0. -1.  0.]
 [ 0.  0.  0. -1.  0.  0. -1.  0. -1.  0.]
 [ 0.  0.  0.  0.  0. -1.  0.  0. -1.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0. -1.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  0.  0.  0.  1.]]


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100000/100000 [00:37<00:00, 2657.97it/s]
