# ABD2

Wij gaan tijdens deze opdracht een food-collector simulatie bouwen.
In deze simulatie is het doel van de agents om zo veel mogelijk groen eten op te eten, terwijl ze rood eten vermijden.
We gaan deze simulatie grid-based maken. 

We maken gebruik van de gymnasium API: https://gymnasium.farama.org/

Ter visualisatie gebruiken we pygame.



### Simulation properties:
We beginnen in principe met een grid van 128x128, maar dit kan uitgebreid worden. 
We zullen de implementatie zoveel mogelijk scalable en aanpasbaar implementeren, door hyperparameters aan te maken voor alle belangrijke properties.

- Number of agents
- Grid size (default 128x128)
- Good food to total square ratio
- Good food spawning pattern (maybe)
- Bad food to total square ratio
- Bad food spawning pattern (maybe)
- Episode duration

### Agent properties:
Actions: up, down, left, right, wait

Perception: full information (knows coordinates of other agents, good food, and bad food)

Agent rules: not yet specified





In [None]:
import numpy as np
import pygame
import gymnasium as gym
from gymnasium import spaces
import random

In [None]:
env = gym.make("LunarLander-v2", render_mode="human")
observation, info = env.reset()

for _ in range(100):
    action = env.action_space.sample()  # agent policy that uses the observation and info
    observation, reward, terminated, truncated, info = env.step(action)

    if terminated or truncated:
        observation, info = env.reset()

env.close()

In [None]:
class agent():
    def __init__(self):
        self.location = [None,None]
        self.points = 0
        
    def move(self, direction, grid_size):
        
        new_location = np.clip(self.location + direction, 0, grid_size -1)
        self.location = new_location
        

In [None]:
class foodCollectorEnv(gym.Env):
    metadata = {"render_modes": ["human", None], "render_fps": 4}

    def __init__(self, render_mode=None, grid_size=128, game_duration = 200, agents = [agent() for _ in range(1)]):
        self.grid_size = grid_size  # The size of the square grid
        self.window_size = 512  # The size of the PyGame window
        self.game_duration = game_duration
        self.current_step = 0
        self.agents = agents
        
        self.observation_space = spaces.Dict({
            'agent_locs': spaces.Sequence(spaces.MultiDiscrete([self.grid_size-1,self.grid_size-1])),
            'good_food_locs':spaces.Sequence(spaces.MultiDiscrete([self.grid_size-1,self.grid_size-1])),
            'bad_food_locs':spaces.Sequence(spaces.MultiDiscrete([self.grid_size-1,self.grid_size-1])),
        })


        self.action_space = spaces.Discrete(5)


        self._action_to_direction = {
            0: np.array([0, 0]),
            1: np.array([1, 0]),
            2: np.array([0, 1]),
            3: np.array([-1, 0]),
            4:np.array([0, -1])
        }

        self.render_mode = render_mode

        self.window = None
        self.clock = None
        
    def _get_obs(self):
        return {
            "agent_locs": [agent.location for agent in self.agents],
            "good_food_locs": self.good_food_locs,
            "bad_food_locs": self.bad_food_locs
        }

    def reset(self, seed=None, options=None):

        super().reset(seed=seed)

        self.agent_locs = []
        self.good_food_locs = []
        self.bad_food_locs = []
        
        print([len(self.agent_locs), len(self.agents)])
        
            
        while True:
            if len(self.agent_locs) >= len(self.agents):
                break
            loc = [random.randint(0,self.grid_size),random.randint(0,self.grid_size)]
            if loc not in self.agent_locs:
                self.agent_locs.append(loc)
                
        for i, agent in enumerate(self.agents):
            agent.location = self.agent_locs[i]
                
        
        observation = self._get_obs()

        if self.render_mode == "human":
            self._render_frame()

        return observation, info
    
    def step(self, action):
        for agent in self.agents:
            
        
            direction = self._action_to_direction[action[0]]
            
            if self._is_empty(np.clip(agent.location + direction, 0, self.grid_size -1)):
                
                agent.move(direction, self.grid_size)
            else:
                agent.move(self._action_to_direction[0], self.grid_size)
                
            
        terminated = self.current_step >= self.game_duration
        reward = 1 if terminated else 0  # 
        observation = self._get_obs()

        if self.render_mode == "human":
            self._render_frame()
        info = {}
        
        self.current_step += 1
        
        return observation, reward, terminated, info
    
    def _is_empty(self,loc):
        
        for agent in self.agents:
            if sorted(agent.location) == sorted(loc):
                return False
            
        return True


In [None]:
env = foodCollectorEnv(render_mode=None, grid_size=128, game_duration = 200)

In [None]:
env.observation_space.sample()

In [None]:
env.reset()

In [None]:
env.step([1])

In [832]:
class foodCollectorEnv(gym.Env):
    metadata = {"render_modes": ["human", None], "render_fps": 4}

    def __init__(self, render_mode=None, grid_size=128, game_duration = 200, agents = [agent() for _ in range(1)]):
        self.grid_size = grid_size  # The size of the square grid
        self.window_size = 512  # The size of the PyGame window
        self.game_duration = game_duration
        self.current_step = 0
        self.agents = agents
        
        self.observation_space = spaces.Dict({
            'agent_locs': spaces.Sequence(spaces.MultiDiscrete([self.grid_size-1,self.grid_size-1])),
            'good_food_locs':spaces.Sequence(spaces.MultiDiscrete([self.grid_size-1,self.grid_size-1])),
            'bad_food_locs':spaces.Sequence(spaces.MultiDiscrete([self.grid_size-1,self.grid_size-1])),
        })


        self.action_space = spaces.Discrete(5)


        self._action_to_direction = {
            0: np.array([0, 0]),
            1: np.array([1, 0]),
            2: np.array([0, 1]),
            3: np.array([-1, 0]),
            4:np.array([0, -1])
        }

        self.render_mode = render_mode

        self.window = None
        self.clock = None
        
    def _get_obs(self):
        return {
            "agent_locs": [agent.location for agent in self.agents],
            "good_food_locs": self.good_food_locs,
            "bad_food_locs": self.bad_food_locs
        }

    def reset(self, seed=None, options=None):

        super().reset(seed=seed)

        self.agent_locs = []
        self.good_food_locs = []
        self.bad_food_locs = []
        
        print([len(self.agent_locs), len(self.agents)])
        
            
        while True:
            if len(self.agent_locs) >= len(self.agents):
                break
            loc = [random.randint(0,self.grid_size),random.randint(0,self.grid_size)]
            if loc not in self.agent_locs:
                self.agent_locs.append(loc)
                
        for i, agent in enumerate(self.agents):
            agent.location = self.agent_locs[i]
                
        
        observation = self._get_obs()

        if self.render_mode == "human":
            self._render_frame()

        return observation, info
    
    def step(self, action):
        for agent in self.agents:
            
        
            direction = self._action_to_direction[action[0]]
            
            if self._is_empty(np.clip(agent.location + direction, 0, self.grid_size -1)):
                
                agent.move(direction, self.grid_size)
            else:
                agent.move(self._action_to_direction[0], self.grid_size)
                
            
        terminated = self.current_step >= self.game_duration
        reward = 1 if terminated else 0  # 
        observation = self._get_obs()

        if self.render_mode == "human":
            self._render_frame()
        info = {}
        
        self.current_step += 1
        
        return observation, reward, terminated, info
    
    def _is_empty(self,loc):
        
        for agent in self.agents:
            if sorted(agent.location) == sorted(loc):
                return False
            
        return True


In [833]:
env = foodCollectorEnv(render_mode=None, grid_size=128, game_duration = 200)

In [834]:
env.observation_space.sample()

OrderedDict([('agent_locs',
              (array([19, 70], dtype=int64), array([98, 62], dtype=int64))),
             ('bad_food_locs', (array([24, 70], dtype=int64),)),
             ('good_food_locs',
              (array([37, 64], dtype=int64),
               array([ 77, 125], dtype=int64),
               array([54, 30], dtype=int64),
               array([72, 69], dtype=int64),
               array([76, 99], dtype=int64),
               array([ 4, 12], dtype=int64),
               array([24, 72], dtype=int64),
               array([90, 64], dtype=int64),
               array([ 64, 117], dtype=int64)))])

In [835]:
env.reset()

[0, 1]


({'agent_locs': [[28, 36]], 'good_food_locs': [], 'bad_food_locs': []}, {})

In [1055]:
env.step([1])

({'agent_locs': [array([127,  36])],
  'good_food_locs': [],
  'bad_food_locs': []},
 1,
 True,
 {})