# Pommerman Demo.

This notebook demonstrates how to train Pommerman agents. Please let us know at support@pommerman.com if you run into any issues.

In [1]:
import os
import sys

In [2]:
# Add the library to the Python path so that we can import its modules
LIB_DIR = os.path.abspath(os.path.join("..", "games"))

if not LIB_DIR in sys.path:
    sys.path.append(LIB_DIR)

In [3]:
import numpy as np

from a.agents import RandomAgent
from a.pommerman.agents import SimpleAgent
from a.pommerman.configs import ffa_v0
from a.pommerman.envs.v0 import Pomme
from a.pommerman.characters import Agent
import a.utility

# Random agents

The following codes instantiates the environment with four random agents who take actions until the game is finished. (This will be a quick game.)

In [None]:
# Instantiate the environment
config = ffa_v0()
env = Pomme(**config["env_kwargs"])

In [None]:
# Add four random agents
agents = {}
for agent_id in range(4):
    agents[agent_id] = RandomAgent(config["agent"](agent_id, config["game_type"]))
env.set_agents(list(agents.values()))

In [None]:
# Seed and reset the environment
env.seed(0)
obs = env.reset()

# Run the random agents until we're done
done = False
while not done:
    env.render()
    actions = env.act(obs)
    obs, reward, done, info = env.step(actions)
env.render(close=True)
env.close()

print(info)

# Human Agents

The following code runs the environment with 3 random agents and one agent with human input (use the arrow keys on your keyboard). This can also be called on the command line with:

`python run_battle.py --agents=player::arrows,random::null,random::null,random::null --config=ffa_v0`

You can also run this with SimpleAgents by executing:

`python run_battle.py --agents=player::arrows,test::a.pommerman.agents.SimpleAgent,test::a.pommerman.agents.SimpleAgent,test::a.pommerman.agents.SimpleAgent --config=ffa_v0`

In [None]:
# Instantiate the environment
config = ffa_v0()
env = Pomme(**config["env_kwargs"])

# Add 3 random agents
agents = {}
for agent_id in range(3):
    agents[agent_id] = RandomAgent(config["agent"](agent_id, config["game_type"]))

# Add human agent
on_key_press, on_key_release = a.utility.get_key_control("arrows")
agents[3] = a.agents.PlayerAgent(
    config["agent"](agent_id, config["game_type"]),
    a.utility.KEY_INPUT,
    on_key_press=on_key_press,
    on_key_release=on_key_release)
    
env.set_agents(list(agents.values()))

In [None]:
# Seed and reset the environment
env.seed(0)
obs = env.reset()

# Run the agents until we're done
done = False
while not done:
    env.render()
    actions = env.act(obs)
    obs, reward, done, info = env.step(actions)
env.render(close=True)
env.close()

# Print the result
print(info)

# Training an Agent

The following code uses Tensorforce to train a PPO agent. This is in the train_with_tensorforce.py module as well.

In [4]:
# Make sure you have tensorforce installed: pip install tensorforce
from tensorforce.agents import PPOAgent
from tensorforce.execution import Runner
from tensorforce.contrib.openai_gym import OpenAIGym

In [13]:
def make_np_float(feature):
    return np.array(feature).astype(np.float32)

def featurize(obs):
    board = obs["board"].reshape(-1).astype(np.float32)
    bombs = obs["bombs"].reshape(-1).astype(np.float32)
    position = make_np_float(obs["position"])
    ammo = make_np_float([obs["ammo"]])
    blast_strength = make_np_float([obs["blast_strength"]])
    can_kick = make_np_float([obs["can_kick"]])

    teammate = obs["teammate"]
    if teammate is not None:
        teammate = teammate.value
    else:
        teammate = -1
    teammate = make_np_float([teammate])

    enemies = obs["enemies"]
    enemies = [e.value for e in enemies]
    if len(enemies) < 3:
        enemies = enemies + [-1]*(3 - len(enemies))
    enemies = make_np_float(enemies)

    return np.concatenate((board, bombs, position, ammo, blast_strength, can_kick, teammate, enemies))


class TensorforceAgent(a.agents.Agent):
    def act(self, obs, action_space):
        pass

In [14]:
# Instantiate the environment
config = ffa_v0()
env = Pomme(**config["env_kwargs"])
env.seed(0)

# Create a Proximal Policy Optimization agent
agent = PPOAgent(
    states_spec=dict(type='float', shape=env.observation_space.shape),
    actions_spec=dict(type='int', num_actions=env.action_space.n),
    network_spec=[
        dict(type='dense', size=64),
        dict(type='dense', size=64)
    ],
    batch_size=128,
    step_optimizer=dict(
        type='adam',
        learning_rate=1e-4
    )
)

# Add 3 random agents
agents = []
for agent_id in range(3):
    agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"])))

# Add TensorforceAgent
agent_id += 1
agents.append(TensorforceAgent(config["agent"](agent_id, config["game_type"])))
env.set_agents(agents)
env.set_training_agent(agents[-1].agent_id)

In [15]:
class WrappedEnv(OpenAIGym):    
    def __init__(self, gym, visualize=False):
        self.gym = gym
        self.visualize = visualize
    
    def execute(self, actions):
        if self.visualize:
            self.gym.render()

        obs = self.gym.get_observations()
        all_actions = self.gym.act(obs)
        all_actions.insert(self.gym.training_agent, actions)
        state, reward, terminal, _ = self.gym.step(all_actions)
        agent_state = featurize(state[self.gym.training_agent])
        agent_reward = reward[self.gym.training_agent]
        return agent_state, terminal, agent_reward
    
    def reset(self):
        obs = self.gym.reset()
        agent_obs = featurize(obs[3])
        return agent_obs

In [16]:
# Instantiate and run the environment for 5 episodes.
wrapped_env = WrappedEnv(env, True)
runner = Runner(agent=agent, environment=wrapped_env)
runner.run(episodes=5, max_episode_timesteps=2000)
print("Stats: ", runner.episode_rewards, runner.episode_timesteps, runner.episode_times)

try:
    runner.close()
except AttributeError as e:
    pass

  if issubdtype(ts, int):
  elif issubdtype(type(size), float):


Stats:  [-1, -1, -1, -1, -1] [25, 26, 25, 28, 216] [3.5424931049346924, 1.5630519390106201, 1.5652358531951904, 1.7198398113250732, 15.060396671295166]
