# ABD2

Wij gaan tijdens deze opdracht een food-collector simulatie bouwen.
In deze simulatie is het doel van de agents om zo veel mogelijk groen eten op te eten, terwijl ze rood eten vermijden.
We gaan deze simulatie grid-based maken. 

We maken gebruik van de gymnasium API: https://gymnasium.farama.org/

Ter visualisatie gebruiken we pygame.



### Simulation properties:
We beginnen in principe met een grid van 128x128, maar dit kan uitgebreid worden. 
We zullen de implementatie zoveel mogelijk scalable en aanpasbaar implementeren, door hyperparameters aan te maken voor alle belangrijke properties.

- Number of agents
- Grid size (default 128x128)
- Good food to total square ratio
- Good food spawning pattern (maybe)
- Bad food to total square ratio
- Bad food spawning pattern (maybe)
- Episode duration

### Agent properties:
Actions: up, down, left, right, wait

Perception: full information (knows coordinates of other agents, good food, and bad food)

Agent rules: not yet specified





In [1]:
import numpy as np
import pygame
import gymnasium as gym
from gymnasium import spaces
import random
import time

pygame 2.1.3.dev8 (SDL 2.0.22, Python 3.10.9)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
class agent():
    def __init__(self):
        self.location = [None, None]
        self.points = 0

    def move(self, direction, grid_size):
        new_location = np.clip(list(self.location) + direction, 0, grid_size - 1)
        self.location = list(new_location)
        
    def pick_action(self,obs):
        
        return random.randint(0,4)
        


In [None]:
class foodCollectorEnv(gym.Env):
    metadata = {"render_modes": ["human", None], "render_fps": 4}

    def __init__(self, render_mode=None, grid_size=128, game_duration=200, agents=1, good_food_ratio=0.1,
                 bad_food_ratio=0.1):
        self.grid_size = grid_size  # The size of the square grid
        self.window_size = 1028  # The size of the PyGame window
        self.game_duration = game_duration
        self.current_step = 0
        self.agents = [agent() for _ in range(agents)]
        self.good_food_ratio = good_food_ratio
        self.bad_food_ratio = bad_food_ratio
        self.good_food_points = 5
        self.bad_food_points = -1
        self.render_mode = render_mode
        self.window = None

        self.observation_space = spaces.Dict({
            'agent_locs': spaces.Sequence(spaces.MultiDiscrete([self.grid_size - 1, self.grid_size - 1])),
            'good_food_locs': spaces.Sequence(spaces.MultiDiscrete([self.grid_size - 1, self.grid_size - 1])),
            'bad_food_locs': spaces.Sequence(spaces.MultiDiscrete([self.grid_size - 1, self.grid_size - 1])),
        })

        self.action_space = spaces.Sequence(spaces.Discrete(5))

        self._action_to_direction = {
            0: np.array([0, 0]),
            1: np.array([1, 0]),
            2: np.array([0, 1]),
            3: np.array([-1, 0]),
            4: np.array([0, -1])
        }

        self.good_food_locations = []
        self.bad_food_locations = []

    def _get_obs(self):
        return {
            "agent_locs": [agent.location for agent in self.agents],
            "good_food_locs": self.good_food_locs,
            "bad_food_locs": self.bad_food_locs
        }

    def reset(self, seed=None, options=None):

        super().reset(seed=seed)

        self.agent_locs = []
        self.good_food_locs = []
        self.bad_food_locs = []

        while True:
            if len(self.agent_locs) >= len(self.agents):
                break
            loc = [random.randint(0, self.grid_size - 1), random.randint(0, self.grid_size - 1)]
            if loc not in self.agent_locs:
                self.agent_locs.append(loc)

        while True:
            if len(self.good_food_locs) >= round(self.good_food_ratio * self.grid_size * self.grid_size):
                break
            loc = [random.randint(0, self.grid_size - 1), random.randint(0, self.grid_size - 1)]
            if loc not in self.agent_locs and loc not in self.good_food_locs and loc not in self.bad_food_locs:
                self.good_food_locs.append(loc)

        while True:
            if len(self.bad_food_locs) >= round(self.bad_food_ratio * self.grid_size * self.grid_size):
                break
            loc = [random.randint(0, self.grid_size - 1), random.randint(0, self.grid_size - 1)]
            if loc not in self.agent_locs and loc not in self.good_food_locs and loc not in self.bad_food_locs:
                self.bad_food_locs.append(loc)

        for i, agent in enumerate(self.agents):
            agent.location = self.agent_locs[i]

        observation = self._get_obs()

        if self.render_mode == "human":
            self._render_frame()

        info = {"agent_points": [agent.points for agent in self.agents]}

        return observation, info

    def step(self, action):
        for i, agent in enumerate(self.agents):

            direction = self._action_to_direction[action[i][0]]

            if self._is_empty(np.clip(agent.location + direction, 0, self.grid_size - 1)):

                agent.move(direction, self.grid_size)
            else:
                agent.move(self._action_to_direction[0], self.grid_size)

            if agent.location in self.good_food_locs:
                self.good_food_locs.remove(agent.location)
                agent.points += self.good_food_points

            if agent.location in self.bad_food_locs:
                self.bad_food_locs.remove(agent.location)
                agent.points += self.bad_food_points

        terminated = self.current_step >= self.game_duration
        reward = 1 if terminated else 0  # 
        observation = self._get_obs()

        if self.render_mode == "human":
            self._render_frame()
        info = {"agent_points": [agent.points for agent in self.agents]}

        self.current_step += 1
        time.sleep(0.2)

        return observation, reward, terminated, info

    def _is_empty(self, loc):

        for agent in self.agents:
            if sorted(agent.location) == sorted(loc):
                return False

        return True

    def render(self):
        if self.render_mode == "rgb_array":
            return self._render_frame()

    def _render_frame(self):
        pygame.font.init()
        font = pygame.font.Font(None, 24)
        scoreboard_width = 0.2

        if self.window is None and self.render_mode == "human":
            pygame.init()
            pygame.display.init()
            self.window = pygame.display.set_mode(
                (self.window_size + self.window_size * scoreboard_width, self.window_size)
            )
     
        canvas = pygame.Surface((self.window_size + self.window_size * scoreboard_width, self.window_size))
        canvas.fill((255, 255, 255))
        pix_square_size = (
                self.window_size / self.grid_size
        )
        

        for i, agent in enumerate(self.agents):
            pygame.draw.rect(
                canvas,
                color='blue',
                rect=[agent.location[0] * pix_square_size, (self.grid_size - 1 - agent.location[1]) * pix_square_size,
                      pix_square_size, pix_square_size],
                width=0
            )
            
            
            score_text = font.render(f"{i}", True, "white")
            canvas.blit(score_text, (agent.location[0] * pix_square_size, (self.grid_size - 1 - agent.location[1]) * pix_square_size))
            
        

        for gfl in self.good_food_locs:
            pygame.draw.rect(
                canvas,
                color='green',
                rect=[gfl[0] * pix_square_size, (self.grid_size - 1 - gfl[1]) * pix_square_size, pix_square_size,
                      pix_square_size],
                width=0
            )

        for bfl in self.bad_food_locs:
            pygame.draw.rect(
                canvas,
                color='red',
                rect=[bfl[0] * pix_square_size, (self.grid_size - 1 - bfl[1]) * pix_square_size, pix_square_size,
                      pix_square_size],
                width=0
            )

        for x in range(self.grid_size + 1):
            pygame.draw.line(
                canvas,
                color=0,
                start_pos=(0, pix_square_size * x),
                end_pos=(self.window_size, pix_square_size * x),
                width=1,
            )
            pygame.draw.line(
                canvas,
                color=0,
                start_pos=(pix_square_size * x, 0),
                end_pos=(pix_square_size * x, self.window_size),
                width=1,
            )
            
        for i, agent in enumerate(self.agents):
            
            score_text = font.render(f'Agent {i} | {agent.points}', True, "black")
            canvas.blit(score_text, (self.window_size + 10 , 10 + (i*50)))

        if self.render_mode == "human":
           
            self.window.blit(canvas, canvas.get_rect())
            pygame.event.pump()
            pygame.display.update()

            #self.clock.tick(self.metadata["render_fps"])
        else:
            return np.transpose(
                np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
            )


In [None]:
env = foodCollectorEnv(render_mode="human", grid_size=64, game_duration=50, agents=10, good_food_ratio=0.02,
                       bad_food_ratio=0.2)

In [5]:
env.reset()

({'agent_locs': [[10, 0],
   [51, 11],
   [36, 55],
   [32, 30],
   [17, 34],
   [7, 56],
   [33, 17],
   [63, 53],
   [19, 3],
   [47, 56]],
  'good_food_locs': [[40, 14],
   [40, 37],
   [12, 3],
   [8, 20],
   [5, 12],
   [41, 10],
   [11, 22],
   [19, 34],
   [25, 0],
   [9, 35],
   [63, 29],
   [3, 47],
   [14, 52],
   [8, 55],
   [43, 54],
   [61, 36],
   [59, 59],
   [40, 19],
   [50, 15],
   [62, 55],
   [44, 29],
   [53, 58],
   [56, 48],
   [59, 26],
   [50, 60],
   [25, 48],
   [8, 49],
   [23, 5],
   [41, 32],
   [30, 3],
   [39, 55],
   [3, 7],
   [8, 56],
   [51, 28],
   [24, 10],
   [47, 13],
   [52, 16],
   [24, 33],
   [16, 45],
   [17, 23],
   [14, 2],
   [33, 20],
   [53, 53],
   [47, 60],
   [63, 0],
   [33, 33],
   [48, 1],
   [38, 36],
   [11, 55],
   [39, 61],
   [47, 28],
   [24, 62],
   [62, 29],
   [4, 56],
   [25, 27],
   [16, 53],
   [13, 29],
   [13, 38],
   [19, 51],
   [37, 33],
   [40, 35],
   [42, 47],
   [23, 31],
   [48, 39],
   [46, 29],
   [19, 13],

In [6]:
observation = env._get_obs
while True:
    

    observation, reward, terminated, info = env.step([[agent.pick_action(observation)] for agent in env.agents])

    if terminated:
        break
pygame.quit()    

In [7]:
print(info)
np.argmax(info['agent_points'])
print(f"Agent {np.argmax(info['agent_points'])} wins the game with {max(info['agent_points'])} points")

{'agent_points': [-2, 1, -8, -3, 0, 3, -5, -5, -3, -4]}
Agent 5 wins the game with 3 points
