In [1]:
import os
import sys

In [2]:
# Add the library to the Python path so that we can import its modules
LIB_DIR = os.path.abspath(os.path.join("..", "games"))

if not LIB_DIR in sys.path:
    sys.path.append(LIB_DIR)

In [3]:
import numpy as np

from a.agents import RandomAgent
from a.pommerman.configs import pommerman_ffa_v0
from a.pommerman.envs.v0 import Pomme
from a.pommerman.characters import Agent
import a.utility

# Random agents

The following codes instantiates the environment with four random agents who take actions until the game is finished.

In [4]:
# Instantiate the environment
config = pommerman_ffa_v0()
env = Pomme(**config["env_kwargs"])

In [5]:
# Add four random agents
agents = {}
for agent_id in range(4):
    agents[agent_id] = RandomAgent(config["agent"](agent_id, config["game_type"]))
env.set_agents(list(agents.values()))

In [6]:
# Seed and reset the environment
env.seed(0)
obs = env.reset()

# Run the random agents until we're done
done = False
while not done:
    env.render()
    actions = env.act(obs)
    obs, reward, done, info = env.step(actions)
env.render(close=True)
env.close()

print(info)

This board has unreachable passages or agents. Re-making...


  if issubdtype(ts, int):
  elif issubdtype(type(size), float):


# Human Agents

The following code runs the environment with 3 random agents and one agent with human input (use the arrow keys on your keyboard)

In [7]:
# Instantiate the environment
config = pommerman_ffa_v0()
env = Pomme(**config["env_kwargs"])

# Add 3 random agents
agents = {}
for agent_id in range(3):
    agents[agent_id] = RandomAgent(config["agent"](agent_id, config["game_type"]))

# Add human agent
on_key_press, on_key_release = a.utility.get_key_control("arrows")
agents[3] = a.agents.PlayerAgent(
    config["agent"](agent_id, config["game_type"]),
    a.utility.KEY_INPUT,
    on_key_press=on_key_press,
    on_key_release=on_key_release)
    
env.set_agents(list(agents.values()))

In [8]:
# Seed and reset the environment
env.seed(0)
obs = env.reset()

# Run the agents until we're done
done = False
while not done:
    env.render()
    actions = env.act(obs)
    obs, reward, done, info = env.step(actions)
env.render(close=True)
env.close()

# Print the result
print(info)

  if issubdtype(ts, int):
  elif issubdtype(type(size), float):


{'result': <Result.Win: 0>, 'winner': [2]}


# Training an Agent

The following code uses Tensorforce to train a PPO agent.

In [6]:
# Make sure you have tensorforce installed: pip install tensorforce
from tensorforce.agents import PPOAgent
from tensorforce.execution import Runner
from tensorforce.contrib.openai_gym import OpenAIGym

import gym

ModuleNotFoundError: No module named 'tensorflow'

In [210]:
# Create a Proximal Policy Optimization agent
agent = PPOAgent(
    states_spec=dict(type='float', shape=(169,)),
    actions_spec=dict(type='int', num_actions=6),
    network_spec=[
        dict(type='dense', size=64),
        dict(type='dense', size=64)
    ],
    batch_size=128,
    step_optimizer=dict(
        type='adam',
        learning_rate=1e-4
    )
)

In [211]:
def featurize(obs):
    return obs["board"].reshape(-1).astype(np.float32)

In [212]:
class TensorforceAgent(a.agents.Agent):
    def act(self, obs, action_space):
        return agent.act(obs)

AttributeError: 'OpenAIGym' object has no attribute 'agents'

In [None]:
# Instantiate the environment
config = pommerman_ffa_v0()
env = Pomme(**config["env_kwargs"])

# Add 3 random agents
agents = {}
for agent_id in range(3):
    agents[agent_id] = RandomAgent(config["agent"](agent_id, config["game_type"]))

# Add TensorforceAgent
agents[3] = TensorforceAgent(config["agent"](agent_id, config["game_type"]))
env.set_agents(list(agents.values()))

In [None]:
# TODO: How do you wrap the environment to make it compatible with gym?
class WrappedEnv(gym.Env):
    
    def __init__(self, env):
        self._env = env
    
    def step(self, action):
        actions = env.act(env._get_observations())
        obs, reward, done, info = env.step(actions)
        agent_obs = featurize(obs[3])
        return agent_obs, reward[3], done, info
    
    def _reset(self):
        self._env.reset()
        
    def _render(self, mode='human', close=False, record_dir=None):
        self._env.render(mode, close, record_dir)
        
    def _seed(self, seed=None):
        self._env.seed(seed)

In [None]:
wrapped_env = WrappedEnv(env)
runner = Runner(agent=agent, environment=wrapped_env)

In [None]:
runner.run(episodes=1, max_episode_timesteps=200)
runner.close()