# ABD2

Wij gaan tijdens deze opdracht een food-collector simulatie bouwen.
In deze simulatie is het doel van de agents om zo veel mogelijk groen eten op te eten, terwijl ze rood eten vermijden.
We gaan deze simulatie grid-based maken. 

We maken gebruik van de gymnasium API: https://gymnasium.farama.org/

Ter visualisatie gebruiken we pygame.



### Simulation properties:
We beginnen in principe met een grid van 128x128, maar dit kan uitgebreid worden. 
We zullen de implementatie zoveel mogelijk scalable en aanpasbaar implementeren, door hyperparameters aan te maken voor alle belangrijke properties.

- Number of agents
- Grid size (default 128x128)
- Good food to total square ratio
- Good food spawning pattern (maybe)
- Bad food to total square ratio
- Bad food spawning pattern (maybe)
- Episode duration

### Agent properties:
Actions: up, down, left, right, wait

Perception: full information (knows coordinates of other agents, good food, and bad food)

Agent rules: not yet specified





In [1]:
import numpy as np
import pygame
import gymnasium as gym
from gymnasium import spaces
import random
import time

pygame 2.1.3 (SDL 2.0.22, Python 3.9.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [None]:
class agent():
    def __init__(self):
        self.location = [None, None]
        self.points = 0
        self.vision_range = 3
        self.memory = []
        self._action_to_direction = {
            0: np.array([0, 0]),
            1: np.array([1, 0]),
            2: np.array([0, 1]),
            3: np.array([-1, 0]),
            4: np.array([0, -1])
        }

    def move(self, direction, grid_size):
        new_location = self.location + direction
        self.location = list(new_location)

    def clip_vision(self, obs):

        clipped_obs = {}

        for k in obs.keys():

            clipped_obs[k] = [x for x in obs[k]
                      if abs(self.location[0] - x[0]) <= self.vision_range
                      and abs(self.location[1] - x[1]) <= self.vision_range
                     ]

        return clipped_obs

    def pick_action(self,obs, grid_size):

        print(f"OG obs: {obs['good_food_locs']}")
        # clip the observation to the agents vision
        obs = self.clip_vision(obs)
        
        print(f"clipped obs {obs['good_food_locs']}")
        print(f"location {self.location}")
        print()

        viable_actions = list(self._action_to_direction.keys())

        other_agent_locations = [x for x in obs['agent_locs'] if x != self.location]

        # Rule 1: agents can't move to a space occupied by another agent
        viable_actions = [v for v in viable_actions if list(self.location +  self._action_to_direction[v]) not in other_agent_locations]
        # Rule 2: agents can't move out of bounds
        viable_actions = [v for v in viable_actions
                          if list(self.location +  self._action_to_direction[v])[0] >= 0
                          and list(self.location +  self._action_to_direction[v])[1] >= 0
                          and list(self.location +  self._action_to_direction[v])[0] < grid_size
                          and list(self.location +  self._action_to_direction[v])[1] < grid_size]

        action = random.choice(viable_actions)
        self.memory.append({'agent_action':action, "agent_obs":obs, "agent_location":self.location})
        return action



In [None]:
class foodCollectorEnv(gym.Env):
    metadata = {"render_modes": ["human", None], "render_fps": 4}

    def __init__(self, render_mode=None, grid_size=128, game_duration=200, agents=1, good_food_ratio=0.1,
                 bad_food_ratio=0.1):
        self.grid_size = grid_size
        self.window_size = 720
        self.game_duration = game_duration * agents
        self.current_step = 0
        self.agents = [agent() for _ in range(agents)]
        self.good_food_ratio = good_food_ratio
        self.bad_food_ratio = bad_food_ratio
        self.good_food_points = 5
        self.bad_food_points = -1
        self.render_mode = render_mode
        self.window = None


        self.observation_space = spaces.Dict({
            'agent_locs': spaces.Sequence(spaces.MultiDiscrete([self.grid_size - 1, self.grid_size - 1])),
            'good_food_locs': spaces.Sequence(spaces.MultiDiscrete([self.grid_size - 1, self.grid_size - 1])),
            'bad_food_locs': spaces.Sequence(spaces.MultiDiscrete([self.grid_size - 1, self.grid_size - 1])),
        })

        self.action_space = spaces.Sequence(spaces.Tuple((spaces.MultiDiscrete(len(self.agents)) ,spaces.MultiDiscrete(5) )))

        self._action_to_direction = {
            0: np.array([0, 0]),
            1: np.array([1, 0]),
            2: np.array([0, 1]),
            3: np.array([-1, 0]),
            4: np.array([0, -1])
        }

        self.good_food_locations = []
        self.bad_food_locations = []

    def _get_obs(self):
        return {
            "agent_locs": [agent.location for agent in self.agents],
            "good_food_locs": self.good_food_locs,
            "bad_food_locs": self.bad_food_locs
        }

    def reset(self, seed=None, options=None):

        super().reset(seed=seed)

        self.agent_locs = []
        self.good_food_locs = []
        self.bad_food_locs = []


        # place agents
        while True:
            if len(self.agent_locs) >= len(self.agents):
                break
            loc = [random.randint(0, self.grid_size - 1), random.randint(0, self.grid_size - 1)]
            if loc not in self.agent_locs:
                self.agent_locs.append(loc)


        # place good food
        while True:
            if len(self.good_food_locs) >= round(self.good_food_ratio * self.grid_size * self.grid_size):
                break
            loc = [random.randint(0, self.grid_size - 1), random.randint(0, self.grid_size - 1)]
            if loc not in self.agent_locs and loc not in self.good_food_locs and loc not in self.bad_food_locs:
                self.good_food_locs.append(loc)

        #place bad food
        while True:
            if len(self.bad_food_locs) >= round(self.bad_food_ratio * self.grid_size * self.grid_size):
                break
            loc = [random.randint(0, self.grid_size - 1), random.randint(0, self.grid_size - 1)]
            if loc not in self.agent_locs and loc not in self.good_food_locs and loc not in self.bad_food_locs:
                self.bad_food_locs.append(loc)

        for i, agent in enumerate(self.agents):
            agent.location = self.agent_locs[i]

        observation = self._get_obs()

        if self.render_mode == "human":
            self._render_frame()

        info = {"agent_points": [agent.points for agent in self.agents]}

        return observation, info

    def step(self, action):

        direction = self._action_to_direction[action[1]]

        self.agents[action[0]].move(direction, self.grid_size)

        if self.agents[action[0]].location in self.good_food_locs:
            self.good_food_locs.remove(self.agents[action[0]].location)
            self.agents[action[0]].points += self.good_food_points

        if self.agents[action[0]].location in self.bad_food_locs:
            self.bad_food_locs.remove(self.agents[action[0]].location)
            self.agents[action[0]].points += self.bad_food_points

        terminated = self.current_step >= self.game_duration or (len(self._get_obs()['good_food_locs']) == 0 and self.current_step > 0)
        reward = 0 # niet relevant voor nu
        observation = self._get_obs()

        if self.render_mode == "human":
            self._render_frame()
        info = {"agent_points": [agent.points for agent in self.agents]}

        self.current_step += 1
        time.sleep(0)

        return observation, reward, terminated, info

    def _is_empty(self, loc):

        for agent in self.agents:
            if sorted(agent.location) == sorted(loc):
                return False

        return True

    def render(self):
        if self.render_mode == "rgb_array":
            return self._render_frame()

    def _render_frame(self):
        pygame.font.init()
        font = pygame.font.Font(None, 24)
        scoreboard_width = 0.2

        if self.window is None and self.render_mode == "human":
            pygame.init()
            pygame.display.init()
            self.window = pygame.display.set_mode(
                (self.window_size + self.window_size * scoreboard_width, self.window_size)
            )

        canvas = pygame.Surface((self.window_size + self.window_size * scoreboard_width, self.window_size))
        canvas.fill((204, 255, 229))
        pix_square_size = (
                self.window_size / self.grid_size
        )


        for i, agent in enumerate(self.agents):
            deer_image = pygame.image.load("deer.png").convert_alpha()
            deer_image = pygame.transform.scale(deer_image, (pix_square_size, pix_square_size))
            canvas.blit(deer_image, (agent.location[0] * pix_square_size, (self.grid_size - 1 - agent.location[1]) * pix_square_size))

            #try to draw vision_range, a bit awkward imo
            fov = pygame.Surface((pix_square_size * (agent.vision_range*2+1), pix_square_size * (agent.vision_range*2+1)))
            fov.set_alpha(128)
            fov.fill((137,137,137))
            canvas.blit(fov, ((agent.location[0] * pix_square_size)-pix_square_size*agent.vision_range, ((self.grid_size - 1 - agent.location[1]) * pix_square_size) - pix_square_size * agent.vision_range))

            scoreboard_bg = pygame.Surface((self.window_size*scoreboard_width,self.window_size+self.window_size*scoreboard_width))
            scoreboard_bg.fill((255, 255, 255))
            scoreboard_bg.set_alpha(255)

            canvas.blit(scoreboard_bg, (self.window_size, 0))
            score_text = font.render(f"{i}", True, "white")
            canvas.blit(score_text, (agent.location[0] * pix_square_size, (self.grid_size - 1 - agent.location[1]) * pix_square_size))



        for gfl in self.good_food_locs:
            good_food_image = pygame.image.load("plant.png").convert_alpha()
            good_food_image = pygame.transform.scale(good_food_image, (pix_square_size, pix_square_size))
            canvas.blit(good_food_image, (gfl[0] * pix_square_size, (self.grid_size - 1 - gfl[1]) * pix_square_size))

        for bfl in self.bad_food_locs:
            bad_food_image = pygame.image.load("evil_plant.png").convert_alpha()
            bad_food_image = pygame.transform.scale(bad_food_image, (pix_square_size, pix_square_size))
            canvas.blit(bad_food_image, (bfl[0] * pix_square_size, (self.grid_size - 1 - bfl[1]) * pix_square_size))

        for x in range(self.grid_size + 1):
            pygame.draw.line(
                canvas,
                color=0,
                start_pos=(0, pix_square_size * x),
                end_pos=(self.window_size, pix_square_size * x),
                width=1,
            )
            pygame.draw.line(
                canvas,
                color=0,
                start_pos=(pix_square_size * x, 0),
                end_pos=(pix_square_size * x, self.window_size),
                width=1,
            )

        for i, agent in enumerate(self.agents):

            score_text = font.render(f'Agent {i} | {agent.points}', True, "black")
            canvas.blit(score_text, (self.window_size + 10 , 10 + (i*50)))

        if self.render_mode == "human":

            self.window.blit(canvas, canvas.get_rect())
            pygame.event.pump()
            pygame.display.update()

            #self.clock.tick(self.metadata["render_fps"])
        else:
            return np.transpose(
                np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
            )


In [None]:
env = foodCollectorEnv(render_mode="human", grid_size=16, game_duration=50, agents=10, good_food_ratio=0.01,
                       bad_food_ratio=0.2)

In [None]:
env.reset()

In [None]:
observation = env._get_obs()
while True:
    
    for i in range(len(env.agents)):
        
        action = [i,env.agents[i].pick_action(observation, env.grid_size)]
        observation, reward, terminated, info = env.step(action)

    if terminated:
        break
pygame.quit()    

In [None]:
print(info)
np.argmax(info['agent_points'])
print(f"Agent {np.argmax(info['agent_points'])} wins the game with {max(info['agent_points'])} points")

In [None]:
env.agents[0].memory

In [8]:
env.agents[0].memory

[{'agent_action': 3,
  'agent_obs': {'agent_locs': [[2, 4], [3, 7], [4, 6]],
   'good_food_locs': [[1, 5]],
   'bad_food_locs': [[2, 3],
    [1, 6],
    [2, 1],
    [5, 7],
    [0, 5],
    [1, 1],
    [5, 6],
    [5, 2],
    [4, 1]]},
  'agent_location': [2, 4]},
 {'agent_action': 0,
  'agent_obs': {'agent_locs': [[1, 4], [2, 7], [4, 6]],
   'good_food_locs': [[1, 5]],
   'bad_food_locs': [[2, 3], [1, 6], [2, 1], [0, 5], [1, 1], [4, 1]]},
  'agent_location': [1, 4]},
 {'agent_action': 1,
  'agent_obs': {'agent_locs': [[1, 4], [1, 7], [4, 7]],
   'good_food_locs': [[1, 5]],
   'bad_food_locs': [[2, 3], [1, 6], [2, 1], [0, 5], [1, 1], [4, 1]]},
  'agent_location': [1, 4]},
 {'agent_action': 2,
  'agent_obs': {'agent_locs': [[2, 4], [2, 7], [4, 7]],
   'good_food_locs': [[1, 5]],
   'bad_food_locs': [[2, 3],
    [1, 6],
    [2, 1],
    [5, 7],
    [0, 5],
    [1, 1],
    [5, 6],
    [5, 2],
    [4, 1]]},
  'agent_location': [2, 4]},
 {'agent_action': 2,
  'agent_obs': {'agent_locs': [[2, 