# Pommerman Demo.

This notebook demonstrates how to train Pommerman agents. Please let us know at support@pommerman.com if you run into any issues.

In [5]:
#from google.colab import drive
#drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
#!pip install jsonmerge

#CONTENT_STORAGE_DIR = "/content/drive/My Drive/Bachelorarbeit"
#sys.path.append(CONTENT_STORAGE_DIR)

Collecting jsonmerge
  Downloading https://files.pythonhosted.org/packages/3f/85/c73f8fd74a2d920935cacfb3aaf99089a22f7291f7906150e63de086a5c2/jsonmerge-1.7.0.tar.gz
Building wheels for collected packages: jsonmerge
  Building wheel for jsonmerge (setup.py) ... [?25l[?25hdone
  Created wheel for jsonmerge: filename=jsonmerge-1.7.0-cp36-none-any.whl size=16739 sha256=0968f308f5e3ead9374ed3d4266185070ee6c705bf51de5e14a2df30c00c4548
  Stored in directory: /root/.cache/pip/wheels/55/1b/1a/29f8d1d20b748b3825a0bd52c6a0defdf44aa0d150b5e6df3a
Successfully built jsonmerge
Installing collected packages: jsonmerge
Successfully installed jsonmerge-1.7.0


In [23]:
import atexit
from datetime import datetime
import os
import random
import sys
import copy
import time
import csv

import argparse
import numpy as np

from pommerman import helpers
from pommerman import make
from pommerman import utility
from pommerman import constants

from pommerman.agents import SimpleAgent
from pommerman.agents import UcbMCTSAgent
from pommerman.agents import UcbLimitMCTSAgent
from pommerman.agents import UcbMRMCTSAgent
from pommerman.agents import UcbMRLimitMCTSAgent
from pommerman.agents.abstract_mcts_skeleton import AbstractMCTSSkeleton

In [24]:
def run_tournament(tournament_name, agent_pool1, agent_pool2, match_count, AllVsAll=True, seed=None):
    '''Wrapper to help start the game'''
    config = 'OneVsOne-v0'
    record_pngs_dir = None #f'C:/tmp/Results/PNGS'
    record_json_dir = None #f'C:/tmp/Results/JSON'
    csv_dir = f'C:/tmp/Results/CSV'
    game_state_file = None
    render_mode = 'human'
    do_sleep = False
    render = False

    if not os.path.isdir(csv_dir):
        os.makedirs(csv_dir)
    game_details = [['p1','p2','result','winner','time','steps', 'add_info_p1', 'add_info_p2']]
    write_csv_pos = 0

    duel_num = 0
    total_duels = len(agent_pool1) * len(agent_pool2)
    game_num = 0
    total_games = total_duels * match_count * 2
    p1_num = 0
    for p1_a in agent_pool1:
        if AllVsAll:
            tmp_agent_pool2 = agent_pool2
        else:
            tmp_agent_pool2 = [agent_pool2[p1_num]]
        p1_num += 1
        for p2_a in tmp_agent_pool2:
            duel_num += 1
            print(f'Duel {duel_num}/{total_duels}')

            for d in range(2):
                if d == 0:
                    agents = [p1_a[1](**p1_a[2]), p2_a[1](**p2_a[2])]
                    agent_names = [p1_a[0], p2_a[0]]
                else:
                    agents = [p2_a[1](**p2_a[2]), p1_a[1](**p1_a[2])]
                    agent_names = [p2_a[0], p1_a[0]]

                env = make(config, agents, game_state_file, render_mode=render_mode)
                if seed is None:
                    # Pick a random seed between 0 and 2^31 - 1
                    seed = random.randint(0, np.iinfo(np.int32).max)
                np.random.seed(seed)
                random.seed(seed)
                env.seed(seed)

                for i in range(match_count):
                    game_num += 1

                    record_pngs_dir_ = None
                    record_json_dir_ = None
                    if record_pngs_dir:
                        record_pngs_dir_ = f'{record_pngs_dir}/{tournament_name}/{agent_names[0]}_vs_{agent_names[1]}_{i+1}'
                    if record_json_dir:
                        record_json_dir_ = f'{record_json_dir}/{tournament_name}/{agent_names[0]}_vs_{agent_names[1]}_{i+1}'

                    start = time.time()
                    info, steps = run(env, agent_names, config, render, do_sleep, record_pngs_dir_, record_json_dir_)
                    total_time = time.time() - start
                    winner = -1
                    if info['result'] == constants.Result.Win:
                        winner = int(info['winners'][0])

                    agent_info_1 = {}
                    agent_info_2 = {}
                    if isinstance(agents[0], AbstractMCTSSkeleton):
                        agents[0].get_agent_info(agent_info_1)
                    if isinstance(agents[1], AbstractMCTSSkeleton):
                        agents[1].get_agent_info(agent_info_2)

                    game_details.append([agent_names[0], agent_names[1], info['result'], winner, total_time, steps, agent_info_1, agent_info_2])

                    print(f"-- {game_num} / {total_games} Result: ", game_details[-1])

                atexit.register(env.close)

            f = open(f'{csv_dir}/{tournament_name}.csv', 'a')
            with f:
                writer = csv.writer(f, delimiter=';')
                while write_csv_pos < len(game_details):
                    writer.writerow(game_details[write_csv_pos])
                    write_csv_pos += 1

In [25]:
def run(env, agent_names, config, render, do_sleep, record_pngs_dir=None, record_json_dir=None):
    '''Runs a game'''
    if record_pngs_dir and not os.path.isdir(record_pngs_dir):
        os.makedirs(record_pngs_dir)
    if record_json_dir and not os.path.isdir(record_json_dir):
        os.makedirs(record_json_dir)

    obs = env.reset()
    done = False

    steps = 0
    while not done:
        if render:
            env.render(
                record_pngs_dir=record_pngs_dir,
                record_json_dir=record_json_dir,
                do_sleep=do_sleep)
        if render is False and record_json_dir:
            env.save_json(record_json_dir)
            time.sleep(1.0 / env._render_fps)
        actions = env.act(obs)
        steps += 1
        obs, reward, done, info = env.step(actions)

    if render:
        env.render(
            record_pngs_dir=record_pngs_dir,
            record_json_dir=record_json_dir,
            do_sleep=do_sleep)
        if do_sleep:
            time.sleep(5)
        env.render(close=True)

    if render is False and record_json_dir:
        env.save_json(record_json_dir)
        time.sleep(1.0 / env._render_fps)

    if record_json_dir:
        finished_at = datetime.now().isoformat()
        utility.join_json_state(record_json_dir, agent_names, finished_at,
                                config, info)

    return info, steps

In [26]:
def run_simple_vs_ucb():
    agent_pool1 = []
    agent_pool2 = []

    # create agents
    agent_simple = SimpleAgent
    agent_pool1.append(('SimpleAgent', agent_simple, {}))

    iters = [1] + [(i+1) * 20 for i in range(5)]
    for i in iters:
        agent_ucb = UcbLimitMCTSAgent
        kwargs = {'expandTreeRollout': False,
                  'maxIterations': i,
                  'maxTime': 0.0,
                  'discountFactor': 0.9999,
                  'depthLimit': None,
                  'C': 0.5}
        agent_pool2.append((
                           f'AgentUCBLimit_iter{kwargs["maxIterations"]}_df{kwargs["discountFactor"]}_dl{kwargs["depthLimit"]}_ex{kwargs["expandTreeRollout"]}_c{kwargs["C"]}',
                           agent_ucb, kwargs))

    # Tournament Settings
    tournament_name = 'Simple_Against_UCBLimit_' + datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
    match_count = 50

    run_tournament(tournament_name, agent_pool1, agent_pool2, match_count)

In [27]:
def run_ucb_rnd_vs_limit():
    agent_pool1 = []
    agent_pool2 = []

    # create agents
    iters = [1] + [(i + 1) * 20 for i in range(5)]
    for i in iters:
        agent_ucb = UcbMCTSAgent
        kwargs = {'expandTreeRollout': False,
                  'maxIterations': i,
                  'maxTime': 0.0,
                  'discountFactor': 0.9999,
                  'depthLimit': None,
                  'C': 0.5}
        agent_pool1.append((
            f'AgentUCBLimit_iter{kwargs["maxIterations"]}_df{kwargs["discountFactor"]}_dl{kwargs["depthLimit"]}_ex{kwargs["expandTreeRollout"]}_c{kwargs["C"]}',
            agent_ucb, kwargs))

    for i in iters:
        agent_ucb = UcbLimitMCTSAgent
        kwargs = {'expandTreeRollout': False,
                  'maxIterations': i,
                  'maxTime': 0.0,
                  'discountFactor': 0.9999,
                  'depthLimit': None,
                  'C': 0.5}
        agent_pool2.append((
            f'AgentUCBLimit_iter{kwargs["maxIterations"]}_df{kwargs["discountFactor"]}_dl{kwargs["depthLimit"]}_ex{kwargs["expandTreeRollout"]}_c{kwargs["C"]}',
            agent_ucb, kwargs))

    # Tournament Settings
    tournament_name = 'UCB_Depth_Limit_Test' + datetime.now().strftime("%m-%d-%Y_%H-%M-%S")
    match_count = 10

    run_tournament(tournament_name, agent_pool1, agent_pool2, match_count, False)

In [28]:
run_simple_vs_ucb()

Duel 1/6
-- 1 / 600 Result:  ['SimpleAgent', 'AgentUCBLimit_iter1_df0.9999_dlNone_exFalse_c0.5', <Result.Win: 0>, 1, 0.8527741432189941, 92, {}, {'avgTime': 0.007910065217391302, 'avgRolloutDepth': 23.413043478260885}]
-- 2 / 600 Result:  ['SimpleAgent', 'AgentUCBLimit_iter1_df0.9999_dlNone_exFalse_c0.5', <Result.Tie: 2>, -1, 0.6556901931762695, 35, {}, {'avgTime': 0.01736385714285714, 'avgRolloutDepth': 60.19999999999999}]
-- 3 / 600 Result:  ['SimpleAgent', 'AgentUCBLimit_iter1_df0.9999_dlNone_exFalse_c0.5', <Result.Win: 0>, 0, 1.4069945812225342, 117, {}, {'avgTime': 0.01063133333333333, 'avgRolloutDepth': 32.64102564102562}]
-- 4 / 600 Result:  ['SimpleAgent', 'AgentUCBLimit_iter1_df0.9999_dlNone_exFalse_c0.5', <Result.Win: 0>, 1, 1.0654377937316895, 120, {}, {'avgTime': 0.007539249999999998, 'avgRolloutDepth': 22.341666666666672}]
-- 5 / 600 Result:  ['SimpleAgent', 'AgentUCBLimit_iter1_df0.9999_dlNone_exFalse_c0.5', <Result.Win: 0>, 1, 0.5918221473693848, 43, {}, {'avgTime': 0.01

KeyboardInterrupt: ignored