# ABD2

Wij gaan tijdens deze opdracht een food-collector simulatie bouwen.
In deze simulatie is het doel van de agents om zo veel mogelijk groen eten op te eten, terwijl ze rood eten vermijden.
We gaan deze simulatie grid-based maken. 

We maken gebruik van de gymnasium API: https://gymnasium.farama.org/

Ter visualisatie gebruiken we pygame.



### Simulation properties:
We beginnen in principe met een grid van 128x128, maar dit kan uitgebreid worden. 
We zullen de implementatie zoveel mogelijk scalable en aanpasbaar implementeren, door hyperparameters aan te maken voor alle belangrijke properties.

- Number of agents
- Grid size (default 128x128)
- Good food to total square ratio
- Good food spawning pattern (maybe)
- Bad food to total square ratio
- Bad food spawning pattern (maybe)
- Episode duration

### Agent properties:
Actions: up, down, left, right, wait

Perception: full information (knows coordinates of other agents, good food, and bad food)

Agent rules: not yet specified





In [None]:
import numpy as np
import pygame
import gymnasium as gym
from gymnasium import spaces
import random
import time

In [None]:
class agent():
    def __init__(self):
        self.location = [None,None]
        self.points = 0
        
    def move(self, direction, grid_size):
        
        new_location = np.clip(list(self.location) + direction, 0, grid_size -1)
        self.location = list(new_location)
        

In [None]:
class foodCollectorEnv(gym.Env):
    metadata = {"render_modes": ["human", None], "render_fps": 4}

    def __init__(self, render_mode=None, grid_size=128, game_duration = 200, agents = 1, good_food_ratio = 0.1, bad_food_ratio = 0.1):
        self.grid_size = grid_size  # The size of the square grid
        self.window_size = 512  # The size of the PyGame window
        self.game_duration = game_duration
        self.current_step = 0
        self.agents = [agent() for _ in range(agents)]
        self.good_food_ratio = good_food_ratio
        self.bad_food_ratio = bad_food_ratio
        self.good_food_points = 1
        self.bad_food_points = -1
        self.render_mode = render_mode
        self.window = None
        self.clock = None
        
        self.observation_space = spaces.Dict({
            'agent_locs': spaces.Sequence(spaces.MultiDiscrete([self.grid_size-1,self.grid_size-1])),
            'good_food_locs':spaces.Sequence(spaces.MultiDiscrete([self.grid_size-1,self.grid_size-1])),
            'bad_food_locs':spaces.Sequence(spaces.MultiDiscrete([self.grid_size-1,self.grid_size-1])),
        })


        self.action_space = spaces.Sequence(spaces.Discrete(5))


        self._action_to_direction = {
            0: np.array([0, 0]),
            1: np.array([1, 0]),
            2: np.array([0, 1]),
            3: np.array([-1, 0]),
            4:np.array([0, -1])
        }
        
        self.good_food_locations = []
        self.bad_food_locations = []

       
        
    def _get_obs(self):
        return {
            "agent_locs": [agent.location for agent in self.agents],
            "good_food_locs": self.good_food_locs,
            "bad_food_locs": self.bad_food_locs
        }

    def reset(self, seed=None, options=None):

        super().reset(seed=seed)

        self.agent_locs = []
        self.good_food_locs = []
        self.bad_food_locs = []
                
            
        while True:
            if len(self.agent_locs) >= len(self.agents):
                break
            loc = [random.randint(0,self.grid_size-1),random.randint(0,self.grid_size-1)]
            if loc not in self.agent_locs:
                self.agent_locs.append(loc)
                
        
        while True:
            if len(self.good_food_locs) >= round(self.good_food_ratio * self.grid_size * self.grid_size):
                break
            loc = [random.randint(0,self.grid_size-1),random.randint(0,self.grid_size-1)]
            if loc not in self.agent_locs and loc not in self.good_food_locs and loc not in self.bad_food_locs:
                self.good_food_locs.append(loc)
                
                
        while True:
            if len(self.bad_food_locs) >= round(self.bad_food_ratio * self.grid_size * self.grid_size):
                break
            loc = [random.randint(0,self.grid_size-1),random.randint(0,self.grid_size-1)]
            if loc not in self.agent_locs and loc not in self.good_food_locs and loc not in self.bad_food_locs:
                self.bad_food_locs.append(loc)
            
                
        for i, agent in enumerate(self.agents):
            agent.location = self.agent_locs[i]
                
        
        observation = self._get_obs()

        if self.render_mode == "human":
            self._render_frame()
            
        info = {"agent_points":[agent.points for agent in self.agents]}

        return observation, info
    
    def step(self, action):
        for i, agent in enumerate(self.agents):
            
        
            direction = self._action_to_direction[action[i][0]]
            
            if self._is_empty(np.clip(agent.location + direction, 0, self.grid_size -1)):
                
                agent.move(direction, self.grid_size)
            else:
                agent.move(self._action_to_direction[0], self.grid_size)
            
            if agent.location in self.good_food_locs:
                self.good_food_locs.remove(agent.location)
                agent.points += self.good_food_points
            
            if agent.location in self.bad_food_locs:
                self.bad_food_locs.remove(agent.location)
                agent.points += self.bad_food_points
                
                
            
        terminated = self.current_step >= self.game_duration
        reward = 1 if terminated else 0  # 
        observation = self._get_obs()

        if self.render_mode == "human":
            self._render_frame()
        info = {"agent_points":[agent.points for agent in self.agents]}
        
        
        self.current_step += 1
        time.sleep(0.5)
        
        return observation, reward, terminated, info
    
    def _is_empty(self,loc):
        
        for agent in self.agents:
            if sorted(agent.location) == sorted(loc):
                return False
            
        return True
    
    
    def render(self):
        if self.render_mode == "rgb_array":
            return self._render_frame()

    def _render_frame(self):
        if self.window is None and self.render_mode == "human":
            pygame.init()
            pygame.display.init()
            self.window = pygame.display.set_mode(
                (self.window_size, self.window_size)
            )
        if self.clock is None and self.render_mode == "human":
            self.clock = pygame.time.Clock()

        canvas = pygame.Surface((self.window_size, self.window_size))
        canvas.fill((255, 255, 255))
        pix_square_size = (
            self.window_size / self.grid_size
        ) 
        
        for agent in self.agents:
            pygame.draw.rect(
                canvas,
                color = 'blue',
                rect = [agent.location[0]*pix_square_size,(self.grid_size -1 -agent.location[1])*pix_square_size,pix_square_size,pix_square_size],
                width = 0
            )
        
        for gfl in self.good_food_locs:
            pygame.draw.rect(
                canvas,
                color = 'green',
                rect = [gfl[0]*pix_square_size,(self.grid_size -1 - gfl[1])*pix_square_size,pix_square_size,pix_square_size],
                width = 0
            )
            
        for bfl in self.bad_food_locs:
            pygame.draw.rect(
                canvas,
                color = 'red',
                rect = [bfl[0]*pix_square_size,(self.grid_size -1 - bfl[1])*pix_square_size,pix_square_size,pix_square_size],
                width = 0
            )
            
        for x in range(self.grid_size + 1):
            pygame.draw.line(
                canvas,
                color = 0,
                start_pos = (0, pix_square_size * x),
                end_pos = (self.window_size, pix_square_size * x),
                width=1,
            )
            pygame.draw.line(
                canvas,
                color = 0,
                start_pos =(pix_square_size * x, 0),
                end_pos = (pix_square_size * x, self.window_size),
                width=1,
            )

        if self.render_mode == "human":
            self.window.blit(canvas, canvas.get_rect())
            pygame.event.pump()
            pygame.display.update()

            self.clock.tick(self.metadata["render_fps"])
        else: 
            return np.transpose(
                np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
            )


In [None]:
env = foodCollectorEnv(render_mode="human", grid_size=32, game_duration = 25,agents = 3, good_food_ratio = 0.05, bad_food_ratio = 0.15)

In [None]:
env.reset()

In [None]:
while True:
    
    observation, reward, terminated, info = env.step([[random.randint(0,4)],[random.randint(0,4)],[random.randint(0,4)]])
    
    if terminated:
        break

print(info)
    