In [292]:
import numpy as np
from enum import IntEnum
import gym
from gym import spaces


def pad_with(vector, pad_width, iaxis, kwargs):
    """ For padding the grid """
    pad_value = kwargs.get('padder', 10)
    vector[:pad_width[0]] = pad_value
    vector[-pad_width[1]:] = pad_value

class Grid:
    """
    Represent a grid and operations on it
    """

    def __init__(self, width, height):
        assert width >= 3
        assert height >= 3

        self.width = width
        self.height = height

        self.grid = np.zeros([self.height, self.width])

    def copy(self):
        from copy import deepcopy
        return deepcopy(self)

    def set(self, i, j, v):
        assert i >= 0 and i < self.height
        assert j >= 0 and j < self.width
        self.grid[i, j] = v

    def set_random(self, v, p):
        zero_indices = np.argwhere(self.grid == 0)
        if zero_indices.size != 0:
            random_index = np.random.randint(0, zero_indices.shape[0])
            i, j = zero_indices[random_index]
            if np.random.random() < p:
                self.grid[i, j] = v
        return i, j

    def get(self, i, j):
        assert i >= 0 and j < self.width
        assert i >= 0 and j < self.height
        return self.grid[i, j]
    
    def reset(self, v, t):
        self.grid = np.where(self.grid==v, t, self.grid)
    
    def update(self, i, j, v1, k, l, v2):
        self.grid[i, j] = v1
        self.grid[k, l] = v2
    
    def fov(self, i, j, dist):
        fov = np.pad(self.grid, dist, pad_with, padder=-1)
        fov = fov[i:i+(2*dist)+1, j:j+(2*dist)+1]
        return fov

    def slice(self, topX, topY, width, height):
        """
        Get a subset of the grid
        """

        grid = Grid(width, height)

        for j in range(0, height):
            for i in range(0, width):
                x = topX + i
                y = topY + j

                if x >= 0 and x < self.width and \
                   y >= 0 and y < self.height:
                    v = self.get(x, y)
                else:
                    v = Wall()

                grid.set(i, j, v)

        return grid

class Actions(IntEnum):
    # Turn left, turn right, move forward
    up = 0
    right = 1
    down = 2
    left = 3
    
    attack_up = 4
    attack_right = 5
    attack_down = 6
    attack_left = 7
    

class Entities(IntEnum):
    food = 1
    poison = 2
    agent = 3
    
class Agents:
    def __init(self):
        self.agents = {}
        
    def add_agent(self, agent):
        self.agents[agent.coordinates] = agent
        
        self.agents = [Agent(self.grid.set_random(self.entities.agent, p=1), self.entities.agent) 
                       for _ in range(self.nr_agents)]

class Agent:
    def __init__(self, coordinates, value):
        self.x, self.y = coordinates
        self.health = 200
        self.value = value
        self.coordinates = list(coordinates)
        self.target_coordinates = list(coordinates)
        self.dead = False
        self.done = False
        
        self.x_target = None
        self.y_target = None

    def move(self, x, y):
        self.x = x
        self.y = y
        self.coordinates = [x, y]
        
    def target_location(self, x, y):
        self.x_target = x
        self.y_target = y
        self.target_coordinates = [x, y]
        
class GridWorld(gym.Env):
    def __init__(self, width=30, height=30, mode='computer', nr_agents=10):

        # Coordinate information
        self.width = width
        self.height = height
        self.grid = None
        
        # Render info
        self.grid_size = 16
        self.tile_location = np.random.randint(0, 2, (self.height, self.width))

        # Trackers
        self.agents = []
        self.agent_coordinates = []
        self.nr_agents = nr_agents
        self.current_step = 0
        self.max_step = 5
        self.mode = mode
        
        self.actions = Actions
        self.entities = Entities

        # For Gym
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(7, 7),
                                            dtype=np.float)

    def reset(self, is_render=False):
        """ Reset the environment to the beginning """
        self.current_step = 0        
        self.grid = Grid(self.width, self.height)
        
        # Add agents
        self.agents = []
        for _ in range(self.nr_agents):
            coordinates = self.grid.set_random(self.entities.agent, p=1)
            agent = Agent(coordinates, self.entities.agent)
            self.agents.append(agent)
            self.agent_coordinates.append(tuple(agent.coordinates))

        
        # Add food
        for i in range(10):
            self.grid.set_random(self.entities.food, p=1)
            self.grid.set_random(self.entities.poison, p=1) 
        
        obs = [self.grid.fov(agent.x, agent.y, 2) for agent in self.agents]
        return self.grid, obs
    
    def step(self, actions):
        """ move a single step """
        self.current_step += 1
        self._act(actions)
        
        rewards, dones, infos = self._get_rewards()
        
        obs = [self.grid.fov(agent.x, agent.y, 2) for agent in self.agents]
        
        print([agent.dead for agent in self.agents], [agent.health for agent in self.agents], rewards)

        return obs
    
    def _get_rewards(self):
        """ Extract reward and whether the game has finished """
        rewards = [0 for _ in range(len(self.agents))]
        dones = [agent.done for agent in self.agents]
        infos = ["" for _ in range(len(self.agents))]
        
        for index, agent in enumerate(self.agents):
            reward = 0
            done = False
            info = ""
            
            if not dones[index]:
                if agent.dead:
                    reward -= 400
                    agent.done = True
                    info = "Dead"
                elif self.current_step == self.max_step:
                    agent.done = True
                    reward = 400
            else:
                agent.done = True
                
            dones[index] = agent.done
            infos[index] = info
            rewards[index] = reward

        return rewards, dones, infos

    def render(self):
        """ Render the game using pygame """
        self._init_pygame_screen()
        self._step_human()
        self._draw()

        return check_pygame_exit()
    
    def _check_death(self):
        for agent in self.agents:
            if not agent.dead and agent.health <= 0:
                self.grid.set(*agent.coordinates, self.entities.food)
                agent.dead = True

    def _act(self, actions):
        """ Make the agents act and reduce its health with each step """
        for agent in self.agents:
            agent.health -= 10
            
        self._prepare_movement(actions)
        self._execute_movement(actions)
        self._attack(actions)
        self._check_death()
                            
                    
    def _prepare_movement(self, actions):
        """ Store the target coordinates agents want to go to """
        for agent, action in zip(self.agents, actions):
            
            if action <= 3 and not agent.dead:
                
                if action == self.actions.up:
                    if agent.x == 0:
                        agent.target_location(agent.x, agent.y)
                    else:
                        agent.target_location(agent.x - 1, agent.y)

                elif action == self.actions.right:
                    if agent.y == self.width - 1:
                        agent.target_location(agent.x, agent.y)
                    else:
                        agent.target_location(agent.x, agent.y + 1)
                        
                elif action == self.actions.down:
                    if agent.x == self.height - 1:
                        agent.target_location(agent.x, agent.y)
                    else:
                        agent.target_location(agent.x + 1, agent.y)

                elif action == self.actions.left:
                    if agent.y == 0:
                        agent.target_location(agent.x, agent.y)
                    else:
                        agent.target_location(agent.x, agent.y - 1)
            else:
                agent.target_location(agent.x, agent.y)
    
    def _execute_movement(self, actions):       
        """ Move if no agents want to go to the same spot """
        
        env.grid.reset(self.entities.agent, 0)

        
        impossible_coordinates = True
        while impossible_coordinates:
            
            impossible_coordinates = self._get_impossible_coordinates()
            
            for index, (agent, action) in enumerate(zip(self.agents, actions)):
            
                if agent.target_coordinates in impossible_coordinates:
                    agent.target_location(agent.x, agent.y)
                    
            
        for index, (agent, action) in enumerate(zip(self.agents, actions)):
            if not agent.dead:
                
                # Move if only agent that wants to move there
                if action <= 3:
                    self._eat(agent, self.grid.get(*agent.target_coordinates))
                    self._update_agent_position(agent, *agent.target_coordinates, self.entities.agent, index)
                
                # Stay if other agents wants to move there
                else:
                    self._update_agent_position(agent, *agent.coordinates, self.entities.agent, index)
                
                
    def _attack(self, actions):
        for agent, action in zip(self.agents, actions):
            if not agent.dead:
                if action == self.actions.attack_up and (agent.x - 1, agent.y) in self.agent_coordinates:
                    index = self.agent_coordinates.index(tuple(agent.coordinates))
                    self.agents[index].health -= 50

                elif action == self.actions.attack_right and (agent.x, agent.y + 1) in self.agent_coordinates:
                    index = self.agent_coordinates.index(tuple(agent.coordinates))
                    self.agents[index].health -= 50

                elif action == self.actions.attack_down and (agent.x + 1, agent.y) in self.agent_coordinates:
                    index = self.agent_coordinates.index(tuple(agent.coordinates))
                    self.agents[index].health -= 50

                elif action == self.actions.attack_left and (agent.x, agent.y - 1) in self.agent_coordinates:
                    index = self.agent_coordinates.index(tuple(agent.coordinates))
                    self.agents[index].health -= 50

                    
    def _eat(self, agent, entity):
        """ Eat food """
        if entity == self.entities.food:
            agent.health += 50
        elif entity == self.entities.poison:
            agent.health -= 50
            
    def _update_agent_position(self, agent, x, y, v, i):
        """ Update position of an agent """
        self.grid.set(x, y, v)
        agent.move(x, y)
        self.agent_coordinates[i] = (x, y)
        
    def _get_impossible_coordinates(self):
        """ Returns coordinates of coordinates where multiple agents want to go """
        target_coordinates = [agent.target_coordinates for agent in self.agents if not agent.dead]
        
        if target_coordinates:
            unq, count = np.unique(target_coordinates, axis=0, return_counts=True)
            impossible_coordinates = [list(coordinate) for coordinate in unq[count>1]]
            return impossible_coordinates
        else:
            return []

In [293]:
env = GridWorld(width=6, height=6, nr_agents=2)
obs = env.reset(); env.grid.grid

array([[1., 2., 1., 1., 0., 0.],
       [1., 2., 1., 3., 0., 2.],
       [2., 2., 0., 1., 1., 1.],
       [3., 1., 1., 0., 2., 0.],
       [0., 2., 2., 0., 2., 0.],
       [0., 0., 0., 0., 0., 2.]])

In [291]:
previous_grid = env.grid.grid
step = np.random.randint(0, 8, 10)
obs = env.step([0, 8])
env.grid.grid

[True, True] [-80, -30] [0, 0]


array([[0., 0., 0., 1., 0., 1.],
       [1., 1., 2., 0., 0., 2.],
       [1., 2., 0., 0., 1., 1.],
       [1., 1., 2., 1., 2., 2.],
       [2., 0., 2., 1., 0., 2.],
       [0., 0., 0., 0., 0., 1.]])

## Pycharm Test

In [81]:
import numpy as np
from enum import IntEnum, Enum
import gym
from gym import spaces
import pygame
from Field.utils import check_pygame_exit


def pad_with(vector, pad_width, iaxis, kwargs):
    """ For padding the grid """
    pad_value = kwargs.get('padder', 10)
    vector[:pad_width[0]] = pad_value
    vector[-pad_width[1]:] = pad_value


class Grid:
    """
    Represent a grid and operations on it
    """

    def __init__(self, width, height):
        assert width >= 3
        assert height >= 3

        self.width = width
        self.height = height

        self.grid = np.zeros([self.height, self.width])

    def copy(self):
        from copy import deepcopy
        return deepcopy(self)

    def set(self, i, j, v):
        assert i >= 0 and i < self.height
        assert j >= 0 and j < self.width
        self.grid[i, j] = v

    def set_random(self, v, p):
        zero_indices = np.argwhere(self.grid == 0)
        if zero_indices.size != 0:
            random_index = np.random.randint(0, zero_indices.shape[0])
            i, j = zero_indices[random_index]
            if np.random.random() < p:
                self.grid[i, j] = v
        return i, j

    def get(self, i, j):
        assert i >= 0 and j < self.width
        assert i >= 0 and j < self.height
        return self.grid[i, j]

    def reset(self, v, t):
        self.grid = np.where(self.grid == v, t, self.grid)

    def update(self, i, j, v1, k, l, v2):
        self.grid[i, j] = v1
        self.grid[k, l] = v2

    def fov(self, i, j, dist):
        fov = np.pad(self.grid, dist, pad_with, padder=-1)
        fov = fov[i:i + (2 * dist) + 1, j:j + (2 * dist) + 1]
        return fov

    def fov_new(self, i, j, dist):
        top = self.grid[:dist, :]
        bottom = self.grid[self.height - dist:, :]
        right = self.grid[:, self.width - dist:]
        left = self.grid[:, :dist]

        lower_left = self.grid[self.height - dist:, :dist]
        lower_right = self.grid[self.height - dist:, self.width - dist:]
        upper_left = self.grid[:dist, :dist]
        upper_right = self.grid[:dist, self.width - dist:]

        full_top = np.concatenate((lower_right, bottom, lower_left), axis=1)
        middle = np.concatenate((right, self.grid, left), axis=1)
        full_bottom = np.concatenate((upper_right, top, upper_left), axis=1)

        fov = np.concatenate((full_top, middle, full_bottom), axis=0)
        fov = fov[i:i + (2 * dist) + 1, j:j + (2 * dist) + 1]

        return fov


class Actions(IntEnum):
    # Turn left, turn right, move forward
    up = 0
    right = 1
    down = 2
    left = 3

    attack_up = 4
    attack_right = 5
    attack_down = 6
    attack_left = 7


class Entities(IntEnum):
    food = 1
    poison = 2
    agent = 3


class Agent:
    def __init__(self, coordinates, value):
        self.x, self.y = coordinates
        self.health = 200
        self.value = value
        self.coordinates = list(coordinates)
        self.target_coordinates = list(coordinates)
        self.dead = False
        self.done = False

        self.x_target = None
        self.y_target = None

    def move(self, x, y):
        self.x = x
        self.y = y
        self.coordinates = [x, y]

    def target_location(self, x, y):
        self.x_target = x
        self.y_target = y
        self.target_coordinates = [x, y]


class GridWorld(gym.Env):
    def __init__(self, width=30, height=30, mode='computer', nr_agents=10):

        # Coordinate information
        self.width = width
        self.height = height
        self.grid = None

        # Render info
        self.grid_size = 16
        self.tile_location = np.random.randint(0, 2, (self.height, self.width))

        # Trackers
        self.agents = []
        self.agent_coordinates = []
        self.nr_agents = nr_agents
        self.current_step = 0
        self.max_step = 30
        self.mode = mode

        self.actions = Actions
        self.entities = Entities

        # For Gym
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(7, 7),
                                            dtype=np.float)

    def reset(self, is_render=False):
        """ Reset the environment to the beginning """
        self.current_step = 0
        self.grid = Grid(self.width, self.height)

        # Add agents
        self.agents = []
        for _ in range(self.nr_agents):
            coordinates = self.grid.set_random(self.entities.agent, p=1)
            agent = Agent(coordinates, self.entities.agent)
            self.agents.append(agent)
            self.agent_coordinates.append(tuple(agent.coordinates))

        # Add Good Food
        for i in range(self.width * self.height):
            if np.random.random() < 0.1:
                self.grid.set_random(self.entities.food, p=1)

        # Add Bad Food
        for i in range(self.width * self.height):
            if np.random.random() < 0.1:
                self.grid.set_random(self.entities.poison, p=1)

        #         for i in range(40):
        #             self.grid.set_random(self.entities.food, p=1)
        #             self.grid.set_random(self.entities.poison, p=1)

        # obs = np.array([self.grid.fov(agent.x, agent.y, 2) for agent in self.agents])
        obs = self._get_obs()
        return obs

    def _get_obs(self):
        observations = [self.grid.fov_new(agent.x, agent.y, 3) for agent in self.agents]
        #
        # one_hot_observations = []
        #
        # for observation in observations:
        #     one_hot_observation = []
        #     for i in [1, 2]:
        #         one_hot = np.where(observation != i, np.zeros([5, 5]), 1)
        #         one_hot_observation.append(one_hot)
        #     one_hot_observations.append(np.array(one_hot_observation))

        one_hot_observations = []

        for observation in observations:
            fov = np.zeros([7, 7])

            loc = np.where(observation == 1)
            for i, j in zip(loc[0], loc[1]):
                fov[i, j] = 1

            loc = np.where(observation == 2)
            for i, j in zip(loc[0], loc[1]):
                fov[i, j] = -1

            one_hot_observations.append(fov)

        return observations

    def step(self, actions):
        """ move a single step """
        self.current_step += 1
        self._act(actions)
        rewards, dones, infos = self._get_rewards()

        # Add food
        if np.count_nonzero(self.grid.grid == self.entities.food) <= 20:
            for i in range(3):
                if np.random.random() < 0.2:
                    self.grid.set_random(self.entities.food, p=1)

        # Add poison
        if np.count_nonzero(self.grid.grid == self.entities.poison) <= 20:
            for i in range(3):
                if np.random.random() < 0.2:
                    self.grid.set_random(self.entities.poison, p=1)

        # obs = np.array([self.grid.fov(agent.x, agent.y, 2) for agent in self.agents])
        obs = self._get_obs()

        return obs, rewards, dones, infos

    def _get_rewards(self):
        """ Extract reward and whether the game has finished """
        rewards = [0 for _ in range(len(self.agents))]
        dones = [False for _ in self.agents]
        infos = ["" for _ in self.agents]

        for index, agent in enumerate(self.agents):
            reward = 0
            info = ""
            done = False

            if not agent.dead:
                if agent.health <= 0:
                    agent.dead = True
                    reward -= 400
                    done = True
                    info = "Dead"
                elif self.current_step == self.max_step:
                    done = True
                    reward = 400
            else:
                done = True

            dones[index] = done
            infos[index] = info
            rewards[index] = reward

        return rewards, dones, infos

    def render(self):
        """ Render the game using pygame """
        self._init_pygame_screen()
        self._step_human()
        self._draw()

        return check_pygame_exit()

    def _check_death(self):
        for agent in self.agents:
            if not agent.dead and agent.health <= 0:
                self.grid.set(*agent.coordinates, self.entities.food)
                # agent.dead = True

    def _act(self, actions):
        """ Make the agents act and reduce its health with each step """
        for agent in self.agents:
            agent.health -= 10

        self._prepare_movement(actions)
        self._execute_movement(actions)
        self._attack(actions)
        self._check_death()

        # print([agent.dead for agent in self.agents], [agent.health for agent in self.agents])

    def _prepare_movement(self, actions):
        """ Store the target coordinates agents want to go to """
        for agent, action in zip(self.agents, actions):

            if action <= 3 and not agent.dead:

                if action == self.actions.up:
                    if agent.x == 0:
                        agent.target_location(self.height - 1, agent.y)
                    else:
                        agent.target_location(agent.x - 1, agent.y)

                elif action == self.actions.right:
                    if agent.y == self.width - 1:
                        agent.target_location(agent.x, 0)
                    else:
                        agent.target_location(agent.x, agent.y + 1)

                elif action == self.actions.down:
                    if agent.x == self.height - 1:
                        agent.target_location(0, agent.y)
                    else:
                        agent.target_location(agent.x + 1, agent.y)

                elif action == self.actions.left:
                    if agent.y == 0:
                        agent.target_location(agent.x, self.width - 1)
                    else:
                        agent.target_location(agent.x, agent.y - 1)
            else:
                agent.target_location(agent.x, agent.y)

    def _execute_movement(self, actions):
        """ Move if no agents want to go to the same spot """

        self.grid.reset(self.entities.agent, 0)

        impossible_coordinates = True
        while impossible_coordinates:

            impossible_coordinates = self._get_impossible_coordinates()

            for index, (agent, action) in enumerate(zip(self.agents, actions)):

                if agent.target_coordinates in impossible_coordinates:
                    agent.target_location(agent.x, agent.y)

        for index, (agent, action) in enumerate(zip(self.agents, actions)):
            if not agent.dead:

                # Move if only agent that wants to move there
                if action <= 3:
                    self._eat(agent, self.grid.get(*agent.target_coordinates))
                    self._update_agent_position(agent, *agent.target_coordinates, self.entities.agent, index)

                # Stay if other agents wants to move there
                else:
                    self._update_agent_position(agent, *agent.coordinates, self.entities.agent, index)

    def _attack(self, actions):
        for agent, action in zip(self.agents, actions):
            if not agent.dead:
                if action == self.actions.attack_up and (agent.x - 1, agent.y) in self.agent_coordinates:
                    index = self.agent_coordinates.index(tuple(agent.coordinates))
                    self.agents[index].health -= 50

                elif action == self.actions.attack_right and (agent.x, agent.y + 1) in self.agent_coordinates:
                    index = self.agent_coordinates.index(tuple(agent.coordinates))
                    self.agents[index].health -= 50

                elif action == self.actions.attack_down and (agent.x + 1, agent.y) in self.agent_coordinates:
                    index = self.agent_coordinates.index(tuple(agent.coordinates))
                    self.agents[index].health -= 50

                elif action == self.actions.attack_left and (agent.x, agent.y - 1) in self.agent_coordinates:
                    index = self.agent_coordinates.index(tuple(agent.coordinates))
                    self.agents[index].health -= 50

    def _eat(self, agent, entity):
        """ Eat food """
        if entity == self.entities.food:
            agent.health += 50
        elif entity == self.entities.poison:
            agent.health -= 50

    def _update_agent_position(self, agent, x, y, v, i):
        """ Update position of an agent """
        self.grid.set(x, y, v)
        agent.move(x, y)
        self.agent_coordinates[i] = (x, y)

    def _get_impossible_coordinates(self):
        """ Returns coordinates of coordinates where multiple agents want to go """
        target_coordinates = [agent.target_coordinates for agent in self.agents if not agent.dead]

        if target_coordinates:
            unq, count = np.unique(target_coordinates, axis=0, return_counts=True)
            impossible_coordinates = [list(coordinate) for coordinate in unq[count > 1]]
            return impossible_coordinates
        else:
            return []

    def _init_pygame_screen(self):
        """ Initialize the pygame screen for rendering """
        pygame.init()
        self.screen = pygame.display.set_mode((round(self.width) * self.grid_size, round(self.height) * self.grid_size))
        clock = pygame.time.Clock()
        clock.tick(3)
        self.screen.fill((255, 255, 255))
        self._draw_tiles()

    def _draw_tiles(self):
        """ Draw tiles on screen """
        tiles = self._get_tiles()
        for i in range(self.width):
            for j in range(self.height):
                self.screen.blit(tiles[self.tile_location[j, i]], (i * 16, j * 16))

    def _draw(self):
        """ Draw all sprites and tiles """
        # Draw agent
        agent_img = pygame.image.load(r'Sprites/agent.png')

        for agent in self.agents:
            if not agent.dead:
                self.screen.blit(agent_img, (agent.x * 16, agent.y * 16))

        # Draw food
        apple = pygame.image.load(r'Sprites/apple.png')
        poison = pygame.image.load(r'Sprites/poison.png')

        # print(self.grid.grid)

        food = np.where(self.grid.grid == self.entities.food)
        for i, j in zip(food[0], food[1]):
            self.screen.blit(apple, (i * 16, j * 16))

        food = np.where(self.grid.grid == self.entities.poison)
        for i, j in zip(food[0], food[1]):
            self.screen.blit(poison, (i * 16, j * 16))

        pygame.display.update()

    def _get_tiles(self):
        """ Load tile images """
        tile_1 = pygame.image.load(r'Sprites/tile_green_dark_grass.png')
        tile_2 = pygame.image.load(r'Sprites/tile_green_light_grass.png')
        return [tile_1, tile_2]

    def _step_human(self):
        """ Execute an action manually """
        if self.mode == 'human':
            events = pygame.event.get()
            action = 4
            actions = [10 for _ in range(20)]
            for event in events:
                if event.type == pygame.KEYDOWN:
                    if event.key == pygame.K_UP:
                        actions[0] = 2
                    if event.key == pygame.K_RIGHT:
                        actions[0] = 1
                    if event.key == pygame.K_DOWN:
                        actions[0] = 0
                    if event.key == pygame.K_LEFT:
                        actions[0] = 3
                    self.step(actions)

                    # fov_food, fov_agents = self._get_fov_matrix(self.entities["Agents"][0])
                    #
                    # print(f"Health : {self.entities['Agents'][0].health}")
                    # print(f"Dead: {str(self.entities['Agents'][0].dead)}")
                    # print(f"Enemies: {fov_agents}")


In [82]:
env = GridWorld(width=5, height=7, nr_agents=1)
obs = env.reset(); env.grid.grid

array([[0., 0., 3., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.]])

In [83]:
previous_grid = env.grid.grid
step = np.random.randint(0, 8, 10)
obs, rewards, dones, infos = env.step([2])
print(rewards, dones, infos, env.agents[0].health, env.agents[0].done, env.agents[0].dead)
print(env.grid.grid)
print(obs[0])

[0] [False] [''] 190 False False
[[0. 0. 0. 0. 0.]
 [0. 0. 3. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 2. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]]
[[0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 3. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


## Speed Test

In [576]:
def run():
    s = env.reset()
    env.step(np.random.randint(0, 8, 10))

In [579]:
env = GridWorld(width=67, height=120, nr_agents=50)
obs = env.reset()

In [580]:
%timeit run()

276 ms ± 8.97 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [581]:
from Field.MultiEnvironment import MultiEnvironment as Environment

In [582]:
env = Environment(width=67, height=120, nr_agents=50)

In [583]:
%timeit run()

3.53 s ± 291 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Test where is slow

In [10]:
import cProfile
from Field.MultiEnvironment import MultiEnvironment as Environment
import numpy as np

def run():
    s = env.reset()
    env.step(np.random.randint(0, 8, 50))
    
env = GridWorld(width=67, height=120, nr_agents=50)

## Test output

In [1]:
from Field import GridWorld

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [6]:
env = GridWorld(width=6, height=7, nr_agents=2)
obs = env.reset()

In [13]:
previous_grid = env.grid.grid
obs, rewards, dones, infos = env.step([4, 5])
print(rewards, dones, infos, env.agents[0].health, env.agents[0].done, env.agents[0].dead, env.current_step, env.max_step)
print(rewards, dones, infos, env.agents[1].health, env.agents[1].done, env.agents[1].dead, env.current_step, env.max_step)
print(env.grid.grid)
print(obs[0])

[0, 0] [False, False] ['', ''] 130 False False 7 30
[0, 0] [False, False] ['', ''] 130 False False 7 30
[[1. 0. 0. 2. 2. 2.]
 [0. 0. 0. 3. 0. 0.]
 [0. 1. 0. 2. 0. 0.]
 [1. 1. 2. 1. 0. 1.]
 [0. 0. 0. 3. 0. 0.]
 [0. 1. 0. 0. 2. 2.]
 [0. 1. 2. 2. 0. 1.]]
[  0.   0.   0.   0.   0.   0.   0.   0.   1.   0.  -1.   0.   0.   0.
   1.   1.  -1.   1.   0.   1.   1.   0.   0.   0.   0.   0.   0.   0.
   0.   1.   0.   0.  -1.  -1.   0.   0.   1.  -1.  -1.   0.   1.   0.
   1.   0.   0.  -1.  -1.  -1.   1.   0.   0.   0.   1.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   1.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 130.   4.   3.]


In [96]:
from Field.MultiEnvironment import MultiEnvironment as Environment
env = Environment(width=6, height=10, nr_agents=1)
obs = env.reset()

In [131]:
obs, rewards, dones, infos = env.step([6])
print(obs[0])
print(rewards)

[[ 0. -1.  0.  1. -1. -1.  0.]
 [ 0.  1.  1.  0.  1.  1.  1.]
 [ 0. -1. -1. -1. -1. -1.  1.]
 [ 0.  1.  1.  0.  0. -1.  1.]
 [ 0.  0.  1.  1.  0.  0.  1.]
 [ 0.  0.  0.  1. -1.  0.  0.]
 [ 0. -1.  1.  0. -1.  1.  0.]]
(0,)


In [132]:
rewards[0]

0

In [133]:
dones

(False,)

In [134]:
infos

('',)

In [46]:
import numpy as np

# Gym
import gym
from gym import spaces
import pygame

# Custom
from Field.utils import check_pygame_exit
from Field import Food, Agent


class MultiEnvironment(gym.Env):
    def __init__(self, width=30, height=30, mode='computer', nr_agents=10):

        # Coordinate information
        self.width = width
        self.height = height
        self.grid = np.zeros([self.height, self.width])
        
        # Render info
        self.grid_size = 16
        self.tile_location = np.random.randint(0, 2, (self.height, self.width))

        # Trackers
        self.agents = []
        self.is_render = False

        self.current_step = 0
        self.max_step = 100
        self.scores = []
        self.mode = mode
        self.nr_agents = nr_agents

        # For Gym
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(7, 7),
                                            dtype=np.float)
        
    def reset(self, is_render=False):
        """ Reset the environment to the beginning """
        self.grid = np.zeros([self.height, self.width])
        self.agents = []
        self.current_step = 0
        
        # Add agent
        self.entities["Agents"] = [self._add_entity(random_location=True, value=1, entity_type="Agent")
                                   for _ in range(self.nr_agents)]

        # Add Good Food
        for i in range(self.width*self.height):
            if np.random.random() < 0.05:
                self._add_entity(random_location=True, value=1, entity_type="Food")

        # Add Bad Food
        for i in range(self.width*self.height):
            if np.random.random() < 0.05:
                self._add_entity(random_location=True, value=-1, entity_type="Food")
        
        self._init_objects()
        obs = [self.get_fov(agent) for agent in self.entities["Agents"]]
        return obs

    def step(self, actions):
        """ move a single step """
        self.current_step += 1
        self._act(actions)

        rewards, dones, infos = zip(*[self._get_reward(agent) for agent in self.entities["Agents"]])

        # Add food randomly
        if len([1 for food in self.entities["Food"] if food.value == 1]) <= 20:
            for i in range(3):
                if np.random.random() < 0.2:
                    self._add_entity(random_location=True, value=1, entity_type="Food")

        # Add poison randomly
        if len([1 for food in self.entities["Food"] if food.value == -1]) <= 20:
            for i in range(3):
                if np.random.random() < 0.2:
                    self._add_entity(random_location=True, value=-1, entity_type="Food")

        obs = [self.get_fov(agent) for agent in self.entities["Agents"]]

        return obs, rewards, dones, infos

    def render(self):
        """ Render the game using pygame """
        self._init_pygame_screen()
        self._step_human()
        self._draw()

        return check_pygame_exit()

    def _act(self, actions):
        """ Make the agent act and reduce its health with each step """
        for agent, action in zip(self.entities["Agents"], actions):
            if action > 3:
                self._attack(action, agent)

        for agent, action in zip(self.entities["Agents"], actions):
            if action <= 3:
                self._move(action, agent)

        for agent, action in zip(self.entities["Agents"], actions):
            self._eat(agent)

        for agent, action in zip(self.entities["Agents"], actions):
            agent.health -= 10

    def _eat(self, agent):
        closest_food = self._get_closest_food_pellet(agent)

        if np.array_equal(agent.coordinates, closest_food.coordinates):
            if closest_food.value == 1:
                agent.health += 50
                agent.update_action("Eat Food")
                self.entities["Food"].remove(closest_food)
            elif closest_food.value == -1:
                agent.health -= 20
                agent.update_action("Eat Poison")
                self.entities["Food"].remove(closest_food)

    def _attack(self, action, agent):
        for other_agent in self.entities["Agents"]:

            if action == 4:  # attack right
                if np.array_equal(agent.coordinates, np.array([other_agent.x - 1, other_agent.y])):
                    other_agent.health -= 100

            elif action == 5:  # attack left
                if np.array_equal(agent.coordinates, np.array([other_agent.x + 1, other_agent.y])):
                    other_agent.health -= 100

            elif action == 6:  # attack top
                if np.array_equal(agent.coordinates, np.array([other_agent.x, other_agent.y + 1])):
                    other_agent.health -= 100

            elif action == 7:  # attack bottom
                if np.array_equal(agent.coordinates, np.array([other_agent.x, other_agent.y - 1])):
                    other_agent.health -= 100

    def _init_pygame_screen(self):
        """ Initialize the pygame screen for rendering """
        pygame.init()
        self.screen = pygame.display.set_mode((round(self.width) * self.grid_size, round(self.height) * self.grid_size))
        clock = pygame.time.Clock()
        clock.tick(3)
        self.screen.fill((255, 255, 255))
        self._draw_tiles()

    def get_fov(self, agent):
        """ Update the agent's field of view """
        fov_food = self._get_fov_per_entity_type(agent, entity_type="Food")
        fov_agents = self._get_fov_per_entity_type(agent, entity_type="Agents")
        fov = np.array(list(fov_food.flatten()) + list(fov_agents.flatten()) + [agent.health] + [agent.x] + [agent.y])
        return fov

    def _get_fov_matrix(self, agent):
        """ Update the agent's field of view """
        fov_food = self._get_fov_per_entity_type(agent, entity_type="Food")
        fov_agents = self._get_fov_per_entity_type(agent, entity_type="Agents")
        return fov_food, fov_agents

    def _get_fov_per_entity_type(self, agent, entity_type):
        """ Get the field of view for a single agent and a specific entity type """
        fov = np.zeros([7, 7])
        for entity in self.entities[entity_type]:

            # Make sure that only other agents are selected that are alive
            if (entity_type == "Agents" and agent != entity and not agent.dead) or entity_type != "Agents":

                # Get closest entities
                if abs(entity.x - agent.x) <= 3 and abs(entity.y - agent.y) <= 3:
                    diff_x = entity.x - agent.x
                    diff_y = agent.y - entity.y
                    fov[3 - diff_y, 3 + diff_x] = entity.value

                # Look through wall if it is on the left side
                if agent.x <= 3 and abs(entity.y - agent.y) <= 3 and self.width - (entity.x - agent.x) <= 3:
                    diff_y = agent.y - entity.y
                    diff_x = 3 - (self.width - (entity.x - agent.x))
                    fov[3 - diff_y, diff_x] = entity.value

                # Look through wall if it is on the right side
                if agent.x >= (self.width - 3) and abs(entity.y - agent.y) <= 3 and self.width - (agent.x - entity.x) <= 3:
                    diff_y = agent.y - entity.y
                    diff_x = 3 + (self.width - (agent.x - entity.x))
                    fov[3 - diff_y, diff_x] = entity.value

                # Look through wall if it is on the bottom (y-inverted)
                if agent.y >= (self.height - 3) and abs(entity.x - agent.x) <= 3 and self.height - (agent.y - entity.y) <= 3:
                    diff_y = 3 + (self.height - (agent.y - entity.y))
                    diff_x = 3 + (entity.x - agent.x)
                    fov[diff_y, diff_x] = entity.value

                # Look through wall if it is on top (y-inverted)
                if agent.y <= 3 and abs(entity.x - agent.x) <= 3 and self.height - (entity.y - agent.y) <= 3:
                    diff_y = 3 - (self.height - (entity.y - agent.y))
                    diff_x = 3 + (entity.x - agent.x)
                    fov[diff_y, diff_x] = entity.value

        return fov

    def _init_objects(self):
        """ Initialize the objects """
        # Add agent
        self.entities["Agents"] = [self._add_entity(random_location=True, value=1, entity_type="Agent")
                                   for _ in range(self.nr_agents)]

        # Add Good Food
        for i in range(self.width*self.height):
            if np.random.random() < 0.05:
                self._add_entity(random_location=True, value=1, entity_type="Food")

        # Add Bad Food
        for i in range(self.width*self.height):
            if np.random.random() < 0.05:
                self._add_entity(random_location=True, value=-1, entity_type="Food")

    def _reset_variables(self):
        """ Reset variables back their starting values """
        self.entities = {"Food": [],
                         "Agent": []}
        self.current_step = 0
        self.max_step = 30

    def _add_entity(self, x=None, y=None, value=0, entity_type=None, random_location=False):
        """ Add an entity to a specified (x, y) coordinate. If random_location = True, then
            the entity will be added to a random unoccupied location. """
        if random_location:
            for i in range(20):
                x = np.random.randint(self.width)
                y = np.random.randint(self.height)
                entity = self.object_types[entity_type](x, y, value)

                if not self._coordinate_is_occupied(entity.coordinates):
                    self.entities[entity_type].append(entity)
                    return entity
        else:
            entity = self.object_types[entity_type](x, y, value)
            self.entities[entity_type].append(entity)
            return entity

    def _coordinate_is_occupied(self, coordinates):
        """ Check if coordinate is occupied """
        for entity_type in self.entities.keys():
            for entity in self.entities[entity_type]:
                if np.array_equal(coordinates, entity.coordinates):
                    return True
        return False

    def _get_closest_food_pellet(self, agent):
        """  Get the closest food pellet to the agent """
        distances = [abs(food.x-agent.x) + abs(food.y-agent.y) for food in self.entities["Food"]]
        if distances:
            idx_closest_distance = int(np.argmin(distances))
        else:
            return Food(-1, -1, 0)
        return self.entities["Food"][idx_closest_distance]

    def _move(self, action, agent):
        """ Move the agent to one space adjacent (up, right, down, left) """
        if agent.health > 0:

            # Up
            if action == 0:
                if agent.y == self.height - 1:
                    agent.update(agent.x, 0)
                    self.grid[]
                else:
                    agent.update(agent.x, agent.y + 1)

            # Right
            elif action == 1:
                if agent.x == self.width - 1:
                    agent.update(0, agent.y)
                else:
                    agent.update(agent.x + 1, agent.y)

            # Down
            elif action == 2:
                if agent.y == 0:
                    agent.update(agent.x, self.height - 1)
                else:
                    agent.update(agent.x, agent.y - 1)

            # Left
            elif action == 3:
                if agent.x == 0:
                    agent.update(self.width - 1, agent.y)
                else:
                    agent.update(agent.x - 1, agent.y)

    def _get_reward(self, agent):
        """ Extract reward and whether the game has finished """
        reward = 0
        done = False
        previous_action = agent.get_action()
        info = ""

        if not agent.dead:
            if agent.health <= 0:
                agent.dead = True
                reward -= 400
                done = True
                self._add_entity(x=agent.x, y=agent.y, value=1, entity_type="Food")
                agent.update(-1, -1)
                info = "Dead"
            elif self.current_step == self.max_step:
                done = True
                reward = 400
            elif previous_action == "Eat Food":
                reward = 300
                reward = 0
            elif previous_action == "Eat Poison":
                reward -= 300
                reward = 0

        else:
            done = True

        return reward, done, info

    def _get_tiles(self):
        """ Load tile images """
        tile_1 = pygame.image.load(r'Sprites/tile_green_dark_grass.png')
        tile_2 = pygame.image.load(r'Sprites/tile_green_light_grass.png')
        return [tile_1, tile_2]

    def _draw_tiles(self):
        """ Draw tiles on screen """
        tiles = self._get_tiles()
        for i in range(self.width):
            for j in range(self.height):
                self.screen.blit(tiles[self.tile_location[j, i]], (i*16, j*16))

    def _draw(self):
        """ Draw all sprites and tiles """
        # Draw agent
        agent_img = pygame.image.load(r'Sprites/agent.png')

        for agent in self.entities["Agents"]:
            if not agent.dead:
                self.screen.blit(agent_img, (agent.x * 16, agent.y * 16))

        # Draw food
        apple = pygame.image.load(r'Sprites/apple.png')
        poison = pygame.image.load(r'Sprites/poison.png')

        for food in self.entities["Food"]:
            if food.value == 1:
                self.screen.blit(apple, (food.x * 16, food.y * 16))
            elif food.value == -1:
                self.screen.blit(poison, (food.x * 16, food.y * 16))

        pygame.display.update()

    def _step_human(self):
        """ Execute an action manually """
        if self.mode == 'human':
            events = pygame.event.get()
            action = 4
            actions = [10 for _ in range(20)]
            for event in events:
                if event.type == pygame.KEYDOWN:
                    if event.key == pygame.K_UP:
                        actions[0] = 2
                    if event.key == pygame.K_RIGHT:
                        actions[0] = 1
                    if event.key == pygame.K_DOWN:
                        actions[0] = 0
                    if event.key == pygame.K_LEFT:
                        actions[0] = 3
                    self.step(actions)

                    fov_food, fov_agents = self._get_fov_matrix(self.entities["Agents"][0])

                    print(f"Health : {self.entities['Agents'][0].health}")
                    print(f"Dead: {str(self.entities['Agents'][0].dead)}")
                    print(f"Enemies: {fov_agents}")


SyntaxError: invalid syntax (<ipython-input-46-7e4bf93bb5e1>, line 272)

In [None]:
def pad_with(vector, pad_width, iaxis, kwargs):
    """ For padding the grid """
    pad_value = kwargs.get('padder', 10)
    vector[:pad_width[0]] = pad_value
    vector[-pad_width[1]:] = pad_value

class Grid:
    """
    Represent a grid and operations on it
    """

    def __init__(self, width, height):
        assert width >= 3
        assert height >= 3

        self.width = width
        self.height = height

        self.grid = np.zeros([self.height, self.width])

    def copy(self):
        from copy import deepcopy
        return deepcopy(self)

    def set(self, x, y, v):
        assert x >= 0 and x < self.width
        assert y >= 0 and y < self.height
        self.grid[(self.height-1) - y, x] = v
        return x, y
        
    def np_to_coordinates(self, i, j):
        return j, (self.width + 1) - i

    def set_random(self, v, p):
        zero_indices = np.argwhere(self.grid == 0)
        if zero_indices.size != 0:
            random_index = np.random.randint(0, zero_indices.shape[0])
            i, j = zero_indices[random_index]
            if np.random.random() < p:
                self.grid[i, j] = v
                
        return self.np_to_coordinates(i, j)

    def get(self, x, y):
        assert x >= 0 and y < self.width
        assert x >= 0 and y < self.height
        return self.grid[(self.height-1) - y, x]
    
    def fov(self, x, y, dist):
        fov = np.pad(self.grid, dist, pad_with, padder=-1)
        fov = fov[self.height-y-1:self.height+(2*dist)-y, x:x+(2*dist)+1]
        return fov

    def slice(self, topX, topY, width, height):
        """
        Get a subset of the grid
        """

        grid = Grid(width, height)

        for j in range(0, height):
            for i in range(0, width):
                x = topX + i
                y = topY + j

                if x >= 0 and x < self.width and \
                   y >= 0 and y < self.height:
                    v = self.get(x, y)
                else:
                    v = Wall()

                grid.set(i, j, v)

        return grid

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [4]:
import numpy as np

978 µs ± 4.06 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
step = 
obs = env.step([1]); env.grid.grid