In [1]:
import numpy as np
import pandas as pd
import json
import math

In [2]:
class Simulator:
    def __init__(self, board_dim, predators=[], preys=[]):
        self.board = np.zeros(board_dim, dtype=int)

        for p in predators:
            self.board[p.coords[0]][p.coords[1]] = 2
            p.board_dim = board_dim

        for p in preys:
            self.board[p.coords[0]][p.coords[1]] = 1
            p.board_dim = board_dim

        self.predators = predators
        self.preys = preys
        self.frames = [self.board.tolist()]

    def save_history(self, filepath):
        with open(filepath, "w") as f:
            json.dump(self.frames, f)

    def simulate(self):
        for _ in range(5):
            for pred in self.predators:
                info = pred.take_action(pred.decide_action(self.board))

                self.board[info[0][0]][info[0][1]] = 0
                self.board[info[1][0]][info[1][1]] = 2

            self.frames.append(self.board.tolist())
            self.save_history("history.json")

In [3]:
class Agent:
    def __init__(self, coords):
        self.coords = coords

        self.actions = {
            "up": [-1, 0],
            "down": [1, 0],
            "left": [0, -1],
            "right": [0, 1]
        }

        self.q_table = {
            "".join(["0"]*24): [0,0,0,0]
        }

    def take_action(self, action):
        """
        Take an action (up, down, left, right), update and return the new state
        """
        row = (self.coords[0] + self.actions[action][0]) % self.board_dim[0]
        col = (self.coords[1] + self.actions[action][1]) % self.board_dim[1]

        if row < 0:
            row += self.board_dim[0]
        if col < 0:
            col += self.board_dim[1]

        old_coords = self.coords

        self.coords = [row, col]


        ### Update Q-table


        return old_coords, self.coords
    
    def encode_state(self, state, n):
        splitter = int((n*2+1)**2/2)
        return "".join([str(el) for el in np.concatenate([state.flatten()[:splitter], state.flatten()[splitter+1:]]).flatten()])
    
    def decide_action(self, board):
        state = self.encode_state(self.get_state(board)) 
        if state not in self.q_table:
            self.q_table[state] = [0,0,0,0]

        state = self.q_table[state]

        return list(self.actions.keys())[np.random.choice([i for i, v in enumerate(state) if v == max(state)])]



class Predator(Agent):
    def encode_state(self, state):
        return super().encode_state(state, 2)
    
    def get_state(self, board):
        row1 = (self.coords[0] - 2) % self.board_dim[0]
        row2 = (self.coords[0] + 2) % self.board_dim[0]
        col1 = (self.coords[1] - 2) % self.board_dim[1]
        col2 = (self.coords[1] + 2) % self.board_dim[1]

        row1 = max(row1, (row1+10)%10 )
        row2 = max(row2, (row2+10)%10 )
        col1 = max(col1, (col1+10)%10 )
        col2 = max(col2, (col2+10)%10 )

        if row1 > 5 and col1 > 5:
            state = np.concatenate( [np.concatenate([board[row1:len(board), col1:len(board[0])], board[:row2+1, col1:len(board[0])]])  ,  \
                    np.concatenate([board[row1:len(board), :col2+1], board[:row2+1, :col2+1]])                ], axis=1 ) 
        elif row1 > 5:
            state = np.concatenate([board[row1:len(board), col1:col2+1], board[:row2+1, col1:col2+1]]) 
        elif col1 > 5:
            state = np.concatenate([board[row1:row2+1, col1:len(board[0])], board[row1:row2+1, :col2+1]], axis=1)
        else:
            state = board[row1:row2+1, col1:col2+1]

        return state
    

class Prey(Agent):
    def encode_state(self, state):
        return super().encode_state(state, 1)

    

In [4]:
board_dim = [10, 10]

pred = Predator([5,5])
p1 = Prey([1,1])
p2 = Prey([2,2])
p3 = Prey([5,8])

s = Simulator(board_dim, [pred], [p1, p2, p3])

s.simulate()