In [1]:
#!pip install lbforaging

In [2]:
import lbforaging
from games.foraging import Foraging 
import numpy as np
import time
from agents.random_agent import RandomAgent

In [3]:
game = Foraging(config=None, seed=1)

  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [4]:
game.reset()
for agent in game.agents:
    print(f"Agent {agent}")
    print(f"Observed state: {game.observe(agent)}")
    print(f"Observed actions: {game.observe_action(agent)}")

Agent agent_0
Observed state: [6. 2. 2. 4. 6. 2. 0. 1. 2.]
Observed actions: None
Agent agent_1
Observed state: [6. 2. 2. 0. 1. 2. 4. 6. 2.]
Observed actions: None


In [5]:
def play_episode(game, agents, verbose=False, render=False):

    # Initialize the game
    game.reset()

    # Print initial observations
    if verbose:
        print(f"Step: {game.env.current_step}")
        for agent in game.agents:
            print(f"Agent {agent}: {game.observe(agent)}")

    # Initialize rewards for each agent
    cum_rewards = dict(map(lambda agent: (agent, 0), game.agents))

    # render the game if required
    if render:
        game.render()
        time.sleep(0.5)

    while not game.done():

        # Get actions from each agent
        actions = dict(map(lambda agent: (agent, agents[agent].action()), game.agents))

        # Perform the actions in the game
        game.step(actions)

        # Update the cum_rewards for each agent
        for agent in game.agents:
            cum_rewards[agent] += game.reward(agent)

        # Print the rewards if verbose is enabled
        if verbose:
            print(f"Step: {game.env.current_step}")
            for agent in game.agents:
                print(f"Agent {agent} action: {actions[agent]} ({game.action_set[actions[agent]]})")
                print(f"Agent {agent} reward: {game.reward(agent)}")
                print(f"Agent {agent} next state: {game.observe(agent)}")
                print(f"Agent {agent} joint action: {game.observe_action(agent)}")
        
        if render:
            game.render()
            time.sleep(0.5)
        
    return cum_rewards

In [6]:
agent_dict = dict(map(lambda agent: (agent, RandomAgent(game=game, agent=agent)), game.agents))

In [7]:
def run(game, agent_dict, n_episodes=100, verbose=False, render=False):
    total_rewards = dict(map(lambda agent: (agent, 0), game.agents))
    for episode in range(n_episodes):
        if verbose:
            print(f"-- Episode: {episode}")
        cum_rewards = play_episode(game, agent_dict, verbose=verbose, render=render)
        for agent in game.agents:
            total_rewards[agent] += cum_rewards[agent]
        if verbose:
            for agent in game.agents:
                print(f"Total rewards {agent}: {total_rewards[agent]}")
            print("--")
    return total_rewards

In [None]:
run(game=game, agent_dict=agent_dict, n_episodes=1, verbose=True, render=True)

-- Episode: 0
Step: 0
Agent agent_0: [6. 2. 2. 4. 6. 2. 0. 1. 2.]
Agent agent_1: [6. 2. 2. 0. 1. 2. 4. 6. 2.]
Step: 1
Agent agent_0 action: 4 (EAST)
Agent agent_0 reward: 0
Agent agent_0 next state: [6. 2. 2. 4. 7. 2. 0. 1. 2.]
Agent agent_0 joint action: (4, 1)
Agent agent_1 action: 1 (NORTH)
Agent agent_1 reward: 0
Agent agent_1 next state: [6. 2. 2. 0. 1. 2. 4. 7. 2.]
Agent agent_1 joint action: (4, 1)
Step: 2
Agent agent_0 action: 5 (LOAD)
Agent agent_0 reward: 0
Agent agent_0 next state: [6. 2. 2. 4. 7. 2. 0. 1. 2.]
Agent agent_0 joint action: (5, 0)
Agent agent_1 action: 0 (NONE)
Agent agent_1 reward: 0
Agent agent_1 next state: [6. 2. 2. 0. 1. 2. 4. 7. 2.]
Agent agent_1 joint action: (5, 0)
Step: 3
Agent agent_0 action: 1 (NORTH)
Agent agent_0 reward: 0
Agent agent_0 next state: [6. 2. 2. 3. 7. 2. 0. 0. 2.]
Agent agent_0 joint action: (1, 3)
Agent agent_1 action: 3 (WEST)
Agent agent_1 reward: 0
Agent agent_1 next state: [6. 2. 2. 0. 0. 2. 3. 7. 2.]
Agent agent_1 joint action: (

{'agent_0': 0, 'agent_1': 0}

: 