In [1]:
import pommerman
from pommerman import agents
from pommerman.constants import BOARD_SIZE
import numpy as np
import time
import os
from keras.layers import Input, Dense, Flatten, Convolution2D, BatchNormalization, Activation, Add
from keras.models import Model
import keras.backend as K
import tensorflow as tf

Import error! You will not be able to render --> Cannot connect to "None"


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# make sure TF does not allocate all memory
# NB! this needs to be done also in subprocesses!
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
K.set_session(tf.Session(config=config))

In [3]:
class StudentsAgent(agents.BaseAgent):
    def __init__(self, actions=6, seed=0, save_path="./dagger/model/model.h4", 
                 log_path='./dagger/logs/', save_best_only=True):
        super().__init__()
        K.clear_session()
        self.log_path = log_path
        self.save_path = save_path
        self.actions = actions
        self.save_best_only = save_best_only
        self.rewards = []
        self.current_epoch = 0        
        
        self.model = self.create_model(actions)
        #if not os.path.isdir(os.path.dirname(save_path)):
        #    os.makedirs(os.path.dirname(save_path))            
        if os.path.isfile(self.save_path):
            try:
                print("Trying to load model")
                self.model.load_weights(self.save_path)
                print("Model was loaded successful")
            except:
                print("Model load failed")
        
    def get_res_block(self, input):
        # Res block 1        
        x = Convolution2D(256, 3, padding='same')(input)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Convolution2D(256, 3, padding='same')(x)
        x = BatchNormalization()(x)
        x = Add()([input, x])
        x = Activation('relu')(x)
        return x
        
    def create_model(self, actions, input_shape=(11, 11, 17,)):
        inp = Input(input_shape)
        x = Convolution2D(256, 3, padding='same')(inp)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        
        # Ten residual blocks
        for i in range(3):
            x = self.get_res_block(x)
        
        # Output block
        # Should be 2 filters
        x = Convolution2D(4, 1, padding='same')(x)
        x = BatchNormalization()(x)   
        x = Activation('relu')(x)
        x = Flatten()(x)
        out = Dense(actions, activation='softmax')(x)
        model = Model(inputs = inp, outputs=out)
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def train(self, obs, labels, batch_size=16384, epochs=100, early_stopping = 10, class_weight=None, initial_epoch=0):
        early_stopping = EarlyStopping(monitor='loss', patience=early_stopping)
        checkpoint = ModelCheckpoint(self.save_path, monitor='loss', save_best_only=self.save_best_only)
        reduce_lr = ReduceLROnPlateau(monitor='loss', patience=3, factor=0.8)
        logger = CSVLogger(self.log_path + 'log.csv', append=True)
        tensorboard = TensorBoard(self.log_path, batch_size=batch_size)
        
        history = self.model.fit(x=obs, y=labels, batch_size=batch_size, epochs=epochs, verbose=1,
                       callbacks=[early_stopping, checkpoint, reduce_lr, logger, tensorboard],
                       validation_split=0.15, shuffle=True, class_weight=class_weight, initial_epoch=initial_epoch)
        self.model.load_weights(self.save_path)
        self.current_epoch += len(history.history['lr'])

    def act(self, obs, action_space):
        feat = StudentsAgent.featurize(obs)
        probs = self.model.predict(feat[np.newaxis])
        action = np.argmax(probs[0])
        return action

    @staticmethod
    def featurize(obs):
        shape = (BOARD_SIZE, BOARD_SIZE, 1)

        def get_matrix(dict, key):
            res = dict[key]
            return res.reshape(shape).astype(np.float32)

        def get_map(board, item):
            map = np.zeros(shape)
            map[board == item] = 1
            return map

        board = get_matrix(obs, 'board')

        # TODO: probably not needed Passage = 0
        rigid_map = get_map(board, 1)               # Rigid = 1
        wood_map = get_map(board, 2)                # Wood = 2
        bomb_map = get_map(board, 3)                # Bomb = 3
        flames_map = get_map(board, 4)              # Flames = 4
        fog_map = get_map(board, 5)                 # TODO: not used for first two stages Fog = 5
        extra_bomb_map = get_map(board, 6)          # ExtraBomb = 6
        incr_range_map = get_map(board, 7)          # IncrRange = 7
        kick_map = get_map(board, 8)                # Kick = 8
        skull_map = get_map(board, 9)               # Skull = 9

        position = obs["position"]
        my_position = np.zeros(shape)
        my_position[position[0], position[1], 0] = 1

        team_mates = get_map(board, obs["teammate"].value) # TODO during documentation it should be an array

        enemies = np.zeros(shape)
        for enemy in obs["enemies"]:
            enemies[board == enemy.value] = 1

        bomb_blast_strength = get_matrix(obs, 'bomb_blast_strength')
        bomb_life = get_matrix(obs, 'bomb_life')

        ammo = np.full((BOARD_SIZE, BOARD_SIZE, 1), obs["ammo"])
        blast_strength = np.full((BOARD_SIZE, BOARD_SIZE, 1), obs["blast_strength"])
        can_kick = np.full((BOARD_SIZE, BOARD_SIZE, 1), int(obs["can_kick"]))

        obs = np.concatenate([my_position, enemies, team_mates, rigid_map,
                              wood_map, bomb_map, flames_map,
                              fog_map, extra_bomb_map, incr_range_map,
                              kick_map, skull_map, bomb_blast_strength,
                              bomb_life, ammo, blast_strength, can_kick], axis=2)
        return obs

In [4]:
def eval(env, num_episodes):
    rewards = []
    lengths = []
    start_time = time.time()
    # Run the episodes just like OpenAI Gym
    for i_episode in range(num_episodes):
        state = env.reset()
        done = False
        lens = [None] * 4
        t = 0
        while not done:
            #env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
            for j in range(4):
                if lens[j] is None and reward[j] != 0:
                    lens[j] = t
            t += 1
        rewards.append(reward)
        lengths.append(lens)
        print('Episode {} finished'.format(i_episode))
    elapsed = time.time() - start_time
    return rewards, lengths, elapsed

In [5]:
num_episodes = 100

In [6]:
# Create a set of agents (exactly four)
agent_list = [
    StudentsAgent(save_path="model.h4"),
    agents.SimpleAgent(),
    agents.SimpleAgent(),
    agents.SimpleAgent(),
]

# Make the "Free-For-All" environment using the agent list
env = pommerman.make('PommeFFACompetition-v0', agent_list)

rewards0, lengths0, elapsed0 = eval(env, num_episodes)

Trying to load model
Model was loaded successful
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Episode 20 finished
Episode 21 finished
Episode 22 finished
Episode 23 finished
Episode 24 finished
Episode 25 finished
Episode 26 finished
Episode 27 finished
Episode 28 finished
Episode 29 finished
Episode 30 finished
Episode 31 finished
Episode 32 finished
Episode 33 finished
Episode 34 finished
Episode 35 finished
Episode 36 finished
Episode 37 finished
Episode 38 finished
Episode 39 finished
Episode 40 finished
Episode 41 finished
Episode 42 fi

In [7]:
# Create a set of agents (exactly four)
agent_list = [
    agents.SimpleAgent(),
    StudentsAgent(save_path="model.h4"),
    agents.SimpleAgent(),
    agents.SimpleAgent(),
]

# Make the "Free-For-All" environment using the agent list
env = pommerman.make('PommeFFACompetition-v0', agent_list)

rewards1, lengths1, elapsed1 = eval(env, num_episodes)

Trying to load model
Model was loaded successful
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Episode 20 finished
Episode 21 finished
Episode 22 finished
Episode 23 finished
Episode 24 finished
Episode 25 finished
Episode 26 finished
Episode 27 finished
Episode 28 finished
Episode 29 finished
Episode 30 finished
Episode 31 finished
Episode 32 finished
Episode 33 finished
Episode 34 finished
Episode 35 finished
Episode 36 finished
Episode 37 finished
Episode 38 finished
Episode 39 finished
Episode 40 finished
Episode 41 finished
Episode 42 fi

In [8]:
# Create a set of agents (exactly four)
agent_list = [
    agents.SimpleAgent(),
    agents.SimpleAgent(),
    StudentsAgent(save_path="model.h4"),
    agents.SimpleAgent(),
]

# Make the "Free-For-All" environment using the agent list
env = pommerman.make('PommeFFACompetition-v0', agent_list)

rewards2, lengths2, elapsed2 = eval(env, num_episodes)

Trying to load model
Model was loaded successful
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Episode 20 finished
Episode 21 finished
Episode 22 finished
Episode 23 finished
Episode 24 finished
Episode 25 finished
Episode 26 finished
Episode 27 finished
Episode 28 finished
Episode 29 finished
Episode 30 finished
Episode 31 finished
Episode 32 finished
Episode 33 finished
Episode 34 finished
Episode 35 finished
Episode 36 finished
Episode 37 finished
Episode 38 finished
Episode 39 finished
Episode 40 finished
Episode 41 finished
Episode 42 fi

In [9]:
# Create a set of agents (exactly four)
agent_list = [
    agents.SimpleAgent(),
    agents.SimpleAgent(),
    agents.SimpleAgent(),
    StudentsAgent(save_path="model.h4"),
]

# Make the "Free-For-All" environment using the agent list
env = pommerman.make('PommeFFACompetition-v0', agent_list)

rewards3, lengths3, elapsed3 = eval(env, num_episodes)

Trying to load model
Model was loaded successful
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
Episode 0 finished
Episode 1 finished
Episode 2 finished
Episode 3 finished
Episode 4 finished
Episode 5 finished
Episode 6 finished
Episode 7 finished
Episode 8 finished
Episode 9 finished
Episode 10 finished
Episode 11 finished
Episode 12 finished
Episode 13 finished
Episode 14 finished
Episode 15 finished
Episode 16 finished
Episode 17 finished
Episode 18 finished
Episode 19 finished
Episode 20 finished
Episode 21 finished
Episode 22 finished
Episode 23 finished
Episode 24 finished
Episode 25 finished
Episode 26 finished
Episode 27 finished
Episode 28 finished
Episode 29 finished
Episode 30 finished
Episode 31 finished
Episode 32 finished
Episode 33 finished
Episode 34 finished
Episode 35 finished
Episode 36 finished
Episode 37 finished
Episode 38 finished
Episode 39 finished
Episode 40 finished
Episode 41 finished
Episode 42 fi

In [10]:
rewards = [(r0[0], r1[1], r2[2], r3[3]) for r0, r1, r2, r3 in zip(rewards0, rewards1, rewards2, rewards3)]

In [11]:
lengths = [(l0[0], l1[1], l2[2], l3[3]) for l0, l1, l2, l3 in zip(lengths0, lengths1, lengths2, lengths3)]

In [12]:
np.mean(rewards, axis=0), np.std(rewards, axis=0)

(array([-0.96, -1.  , -0.94, -1.  ]),
 array([0.28      , 0.        , 0.34117444, 0.        ]))

In [13]:
np.mean(lengths, axis=0), np.std(lengths, axis=0)

(array([ 68.48,  33.1 , 110.48,  35.44]),
 array([121.88941546,  43.38928439, 195.58729407,  48.45334251]))

In [14]:
np.mean(rewards), np.std(rewards)

(-0.975, 0.22220486043288973)

In [15]:
np.mean(lengths), np.std(lengths)

(61.875, 123.76859203772175)

In [16]:
elapsed = elapsed0 + elapsed1 + elapsed2 + elapsed3
total_timesteps = np.sum(np.max(np.concatenate([lengths0, lengths1, lengths2, lengths3], axis=0), axis=1))
elapsed, total_timesteps, elapsed / total_timesteps

(1143.3678739070892, 134485, 0.008501824544797481)

In [17]:
np.savez_compressed("eval_students_%d.npz" % num_episodes, rewards=rewards, lengths=lengths, elapsed=elapsed, total_timesteps=total_timesteps,
                   rewards0=rewards0, rewards1=rewards1, rewards2=rewards2, rewards3=rewards3,
                   lengths0=lengths0, lengths1=lengths1, lengths2=lengths2, lengths3=lengths3,
                   elapsed0=elapsed0, elapsed1=elapsed1, elapsed2=elapsed2, elapsed3=elapsed3)

In [18]:
np.mean(rewards0, axis=0), np.std(rewards0, axis=0)

(array([-0.96, -0.54, -0.46, -0.46]),
 array([0.28      , 0.84166502, 0.88791892, 0.88791892]))

In [19]:
np.mean(lengths0, axis=0), np.std(lengths0, axis=0)

(array([ 68.48, 262.56, 227.42, 267.13]),
 array([121.88941546, 250.24916863, 233.49240587, 245.03553436]))

In [20]:
elapsed = elapsed0
total_timesteps = np.sum(np.max(lengths0, axis=1))
elapsed, total_timesteps, elapsed / total_timesteps

(278.80872774124146, 32623, 0.008546385303045136)