In [123]:
import tqdm
import ctypes
import random
import torch
import numpy as np

from time import sleep
from collections import defaultdict
from grid import Grid, RandomGridGenerator

from IPython.display import clear_output
from utils import two_int_to_hash, numpy_arr_to_str

In [124]:
# GLOBAL CONFIGS
GRID_NUMBER_OF_ROWS = 10
GRID_NUMBER_OF_COLS = 10

PREDATOR_VISION_DIST = 3
PREY_VISION_DIST = 2

PREDATOR_VALUE = 1
PREY_VALUE = 2

PREDATOR_APPROXIMATE_SHARE = 0.4
PREY_APPROXIMATE_SHARE = 0.6

PREDATOR_DEFAULT_HEALTH = 100
PREDATOR_DEFAULT_HEALTH = 60

alpha = 0.1
gamma = 0.6
epsilon = 0.1

action_space = [
    "up",
    "down",
    "left",
    "right",
    "stay",
]

action_idxs = [0,1,2,3,4]

In [125]:
class AgentNet:
    def __init__(self):
        self.q_table = defaultdict(lambda: np.zeros(4))

    def choose_next_cell(self, state):
        hashed_state = numpy_arr_to_str(state)

        if hashed_state not in self.q_table:
             self.q_table[hashed_state] = self._get_default_knowledge()

        allowed_actions = self._get_allowed_actions(state)

        if random.uniform(0, 1) < epsilon:
            return random.choice(allowed_actions)
        else:
            return np.argmax(self.q_table[hashed_state][allowed_actions,])

    def _get_allowed_actions(self, state):
        actions = [4]

        if state[0][1] == 0:
            actions.append(0)

        if state[2][1] == 0:
            actions.append(1)

        if state[1][0] == 0:
            actions.append(2)

        if state[1][2] == 0:
            actions.append(3)

        return actions

    def _get_default_knowledge(self):
        return np.zeros(5)



class PreyNet(AgentNet):
    pass

class PredatorNet(AgentNet):
    pass


In [126]:
predator_net = PredatorNet()
prey_net = PreyNet()

In [127]:
class Animal:
    __slots__ = "last_move", "remaining_life", "_type"
    def __init__(self, remaining_life, _type):
        self.last_move = None
        self.remaining_life = remaining_life
        self._type = _type

    def process_epoch(self):
        self.remaining_life -= 1

    def move(self, vision_matrix):
        if self._type == PREDATOR_VALUE:
            return predator_net.choose_next_cell(vision_matrix)
        elif self._type == PREY_VALUE:
            return prey_net.choose_next_cell(vision_matrix)


In [128]:
def get_dest_from_action(action, row, col):
    action = action_space[action]
    if action == "down":
        return (row+1) % GRID_NUMBER_OF_ROWS, col

    elif action == "up":
        return (row-1) % GRID_NUMBER_OF_ROWS, col

    elif action == "left":
        return row, (col-1)%GRID_NUMBER_OF_COLS

    elif action == "right":
        return row, (col+1)%GRID_NUMBER_OF_COLS

    elif action == "stay":
        return row, col

    else:
        raise Exception(f"Action can't be {action}")
    
def grid_of_nums_to_objects(grid: Grid) -> np.array:
    new_grid = np.empty(grid.grid.shape, dtype=object)
    for rowidx, row in enumerate(grid.grid):
        for colidx, val in enumerate(row):
            if val == PREDATOR_VALUE:     
                new_grid[rowidx][colidx] = Animal(PREDATOR_DEFAULT_HEALTH, PREDATOR_VALUE)
            elif val == PREY_VALUE:
                new_grid[rowidx][colidx] = Animal(PREY_DEFAULT_HEALTH, PREY_VALUE)

    return new_grid


In [129]:
rgg = RandomGridGenerator(GRID_NUMBER_OF_ROWS, GRID_NUMBER_OF_COLS)

generated_grid = rgg.getGrid(round(PREDATOR_APPROXIMATE_SHARE * 100) * [PREDATOR_VALUE] + round(PREY_APPROXIMATE_SHARE * 100) * [PREY_VALUE])

In [130]:
grid = Grid(generated_grid)
object_grid = grid_of_nums_to_objects(grid)



In [131]:


prey_c = []
pred_c = []


for time in tqdm.tqdm(range(100000)):

    next_grid = np.zeros((grid.rown, grid.coln))
    next_object_grid = np.empty(object_grid.shape, dtype=object)

    for row, line in enumerate(grid.grid):
        for col, item in enumerate(line):
            if item == PREY_VALUE:
                prey: Animal = object_grid[row][col]
                prey.process_epoch()
                if prey.remaining_life == 0:
                    grid.grid[row][col] = 0
                    object_grid[row][col] = None
                    continue

                if time % 4 == 0:
                    vision_space = grid.getNeighbors(row, col, PREY_VISION_DIST)
                    action = prey.move(vision_space)

                    dest = get_dest_from_action(action, row, col)
                    next_grid[dest[0], dest[1]] = item
                    next_object_grid[dest[0], dest[1]] = prey

                else:
                    next_grid[row, col] = item
                    next_object_grid[row, col] = prey

            if item == PREDATOR_VALUE:
                predator: Animal = object_grid[row][col]
                predator.process_epoch()
                if predator.remaining_life == 0:
                    grid.grid[row][col] = 0
                    object_grid[row][col] = None
                    continue

                if time % 4 == 0:
                    reward = 0
                    vision_space = grid.getNeighbors(row, col, PREDATOR_VISION_DIST)
                    action = predator.move(vision_space)

                    dest = get_dest_from_action(action, row, col)
                    if next_grid[dest[0], dest[1]] == PREY_VALUE:
                        reward = 1

                    max_value = np.max(predator_net.q_table[numpy_arr_to_str(grid.getNeighborsFromNext(next_grid, dest[0], dest[1], PREDATOR_VISION_DIST))])
                    new_q_value = (1 - alpha) * predator_net.q_table[numpy_arr_to_str(vision_space)][action] + alpha * (reward + gamma * max_value)
                    predator_net.q_table[numpy_arr_to_str(vision_space)][action] = new_q_value

                    next_grid[dest[0], dest[1]] = item
                    next_object_grid[dest[0], dest[1]] = predator

                else:
                    next_grid[row, col] = item
                    next_object_grid[row, col] = predator

    grid = Grid(next_grid)
    object_grid = next_object_grid


    prey_count = np.count_nonzero(grid.grid == PREY_VALUE)
    pred_count = np.count_nonzero(grid.grid == PREDATOR_VALUE)
    prey_c.append(prey_count)
    pred_c.append(pred_count)

    if prey_count == 0 and pred_count == 0:
        print("no more animals on the grid")
        # for row, line in enumerate(grid.grid):
        #     for col, item in enumerate(line):
        #         if item == 0 and random.uniform(0,1) < 0.1:
        #             grid.grid[row][col] = PREY_VALUE

    # if time % 100 == 0:
    clear_output(wait=True)
    print("Episode: {}".format(time + 1))
    print(grid.grid)
    print(object_grid)
    sleep(1)


Episode: 17
[[0 0 0 2 0 2 0 0 0 2]
 [0 0 2 0 0 0 0 0 0 2]
 [0 0 0 0 2 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 1 0 0 1 2 0]
 [0 2 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1]
 [0 2 0 0 1 0 0 0 2 0]
 [0 0 0 1 0 0 1 1 0 1]
 [0 0 0 0 0 2 0 0 0 0]]
[[None None None <__main__.Animal object at 0x7f5130809b80> None
  <__main__.Animal object at 0x7f51307b07c0> None None None
  <__main__.Animal object at 0x7f513080a780>]
 [None None <__main__.Animal object at 0x7f51307b0c80> None None None
  None None None <__main__.Animal object at 0x7f51307b25c0>]
 [None None None None <__main__.Animal object at 0x7f51307b38c0> None
  None None None None]
 [None None None None None None None None None None]
 [None <__main__.Animal object at 0x7f51307b05c0> None None
  <__main__.Animal object at 0x7f513016ab40> None None
  <__main__.Animal object at 0x7f513016a680>
  <__main__.Animal object at 0x7f5130169a40> None]
 [None <__main__.Animal object at 0x7f513080a300> None None None None
  None None None None]
 [Non

  0%|                                                                          | 16/100000 [00:16<29:07:43,  1.05s/it]


KeyboardInterrupt: 